From 033de60cad123bfceacb261cd55802cba40d657f Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 15:33:51 +0200
Subject: [PATCH 01/22] 390 convert StepDecay to Numpower

---
 docs/neural-network/optimizers/step-decay.md  |   4 +-
 .../Optimizers/StepDecay/StepDecay.php        | 115 ++++++++++++++++++
 .../Optimizers/StepDecay/StepDecayTest.php    | 100 +++++++++++++++
 3 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/StepDecay/StepDecay.php
 create mode 100644 tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php

diff --git a/docs/neural-network/optimizers/step-decay.md b/docs/neural-network/optimizers/step-decay.md
index 1a21f0804..0ec9395cc 100644
--- a/docs/neural-network/optimizers/step-decay.md
+++ b/docs/neural-network/optimizers/step-decay.md
@@ -12,7 +12,7 @@ A learning rate decay optimizer that reduces the global learning rate by a facto
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\StepDecay;
+use Rubix\ML\NeuralNet\Optimizers\StepDecay\StepDecay;
 
 $optimizer = new StepDecay(0.1, 50, 1e-3);
-```
\ No newline at end of file
+```
diff --git a/src/NeuralNet/Optimizers/StepDecay/StepDecay.php b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
new file mode 100644
index 000000000..246adc6c7
--- /dev/null
+++ b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
@@ -0,0 +1,115 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\StepDecay;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+
+/**
+ * Step Decay
+ *
+ * A linear learning rate scheduler that reduces the learning rate by a factor
+ * of the decay parameter whenever it reaches a new *floor*. The number of
+ * steps needed to reach a new floor is defined by the *steps* parameter.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class StepDecay implements Optimizer
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The size of every floor in steps. i.e. the number of steps to take before applying another factor of decay.
+     *
+     * @var int
+     */
+    protected int $losses;
+
+    /**
+     * The factor to decrease the learning rate by over a period of k steps.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The number of steps taken so far.
+     *
+     * @var int
+     */
+    protected int $steps = 0;
+
+    /**
+     * @param float $rate
+     * @param int $losses
+     * @param float $decay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.01, int $losses = 100, float $decay = 1e-3)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($losses < 1) {
+            throw new InvalidArgumentException(
+                "The number of steps per floor must be greater than 0, $losses given."
+            );
+        }
+
+        if ($decay < 0.0) {
+            throw new InvalidArgumentException(
+                "Decay rate must be positive, $decay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->losses = $losses;
+        $this->decay = $decay;
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $floor = floor($this->steps / $this->losses);
+
+        $rate = $this->rate * (1.0 / (1.0 + $floor * $this->decay));
+
+        ++$this->steps;
+
+        return NumPower::multiply($gradient, $rate);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Step Decay (rate: {$this->rate}, steps: {$this->losses}, decay: {$this->decay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
new file mode 100644
index 000000000..f3535552b
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
@@ -0,0 +1,100 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\StepDecay;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\StepDecay\StepDecay;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Optimizers')]
+#[CoversClass(StepDecay::class)]
+class StepDecayTest extends TestCase
+{
+    protected StepDecay $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.00001, 0.00005, -0.00002],
+                [-0.00001, 0.00002, 0.00003],
+                [0.00004, -0.00001, -0.0005],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new StepDecay(rate: 0.001);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with invalid learning rate')]
+    public function testConstructorWithInvalidRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new StepDecay(rate: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with invalid losses')]
+    public function testConstructorWithInvalidLosses() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new StepDecay(rate: 0.01, losses: 0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with invalid decay')]
+    public function testConstructorWithInvalidDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new StepDecay(rate: 0.01, losses: 100, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Step Decay (rate: 0.001, steps: 100, decay: 0.001)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}
+

From a02c4a08dc490ff158174a4890e8facb91140d23 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 17:36:38 +0200
Subject: [PATCH 02/22] 390 convert RMSProp to Numpower

---
 docs/neural-network/optimizers/rms-prop.md    |   4 +-
 src/NeuralNet/Optimizers/Base/Adaptive.php    |  25 +++
 src/NeuralNet/Optimizers/RMSProp/RMSProp.php  | 158 ++++++++++++++++++
 .../Optimizers/RMSProp/RMSPropTest.php        | 127 ++++++++++++++
 4 files changed, 312 insertions(+), 2 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Base/Adaptive.php
 create mode 100644 src/NeuralNet/Optimizers/RMSProp/RMSProp.php
 create mode 100644 tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php

diff --git a/docs/neural-network/optimizers/rms-prop.md b/docs/neural-network/optimizers/rms-prop.md
index fdca6fd05..ae6b847bc 100644
--- a/docs/neural-network/optimizers/rms-prop.md
+++ b/docs/neural-network/optimizers/rms-prop.md
@@ -11,10 +11,10 @@ An adaptive gradient technique that divides the current gradient over a rolling
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\RMSProp;
+use Rubix\ML\NeuralNet\Optimizers\RMSProp\RMSProp;
 
 $optimizer = new RMSProp(0.01, 0.1);
 ```
 
 ## References
-[^1]: T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude.
\ No newline at end of file
+[^1]: T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude.
diff --git a/src/NeuralNet/Optimizers/Base/Adaptive.php b/src/NeuralNet/Optimizers/Base/Adaptive.php
new file mode 100644
index 000000000..35ee5323b
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Base/Adaptive.php
@@ -0,0 +1,25 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Base;
+
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+/**
+ * Adaptive
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Adaptive extends Optimizer
+{
+    /**
+     * Warm the parameter cache.
+     *
+     * @param Parameter $param
+     */
+    public function warm(Parameter $param) : void;
+}
diff --git a/src/NeuralNet/Optimizers/RMSProp/RMSProp.php b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
new file mode 100644
index 000000000..531f3ad80
--- /dev/null
+++ b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
@@ -0,0 +1,158 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\RMSProp;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * RMS Prop
+ *
+ * An adaptive gradient technique that divides the current gradient over a rolling window
+ * of magnitudes of recent gradients.
+ *
+ * References:
+ * [1] T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the
+ * gradient by a running average of its recent magnitude.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class RMSProp implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The rms decay rate.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The opposite of the rms decay rate.
+     *
+     * @var float
+     */
+    protected float $rho;
+
+    /**
+     * The cache of running squared gradients.
+     *
+     * @var array<NDArray|array>
+     */
+    protected array $cache = [
+        //
+    ];
+
+    /**
+     * @param float $rate
+     * @param float $decay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.001, float $decay = 0.1)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($decay <= 0.0 or $decay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Decay must be between 0 and 1, $decay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->decay = $decay;
+        $this->rho = 1.0 - $decay;
+    }
+
+    /**
+     * Warm the parameter cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $this->cache[$param->id()] = NumPower::zeros($param->param()->shape());
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * RMSProp update (element-wise):
+     *   v_t = ρ · v_{t-1} + (1 − ρ) · g_t^2
+     *   Δθ_t = η · g_t / max(sqrt(v_t), ε)
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - v_t is the running average of squared gradients,
+     *   - ρ = 1 − decay, η is the learning rate,
+     *   - ε is a small constant to avoid division by zero (implemented by clipping √v_t to [ε, +∞)).
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $norm = $this->cache[$param->id()];
+
+        $norm = NumPower::add(
+            NumPower::multiply($norm, $this->rho),
+            NumPower::multiply(NumPower::square($gradient), $this->decay)
+        );
+
+        $this->cache[$param->id()] = $norm;
+
+        $denominator = NumPower::sqrt($norm);
+        $denominator = NumPower::clip($denominator, EPSILON, PHP_FLOAT_MAX);
+
+        return NumPower::divide(
+            NumPower::multiply($gradient, $this->rate),
+            $denominator
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "RMS Prop (rate: {$this->rate}, decay: {$this->decay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
new file mode 100644
index 000000000..456bd54c0
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
@@ -0,0 +1,127 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\RMSProp;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\RMSProp\RMSProp;
+
+#[Group('Optimizers')]
+#[CoversClass(RMSProp::class)]
+class RMSPropTest extends TestCase
+{
+    protected RMSProp $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.0031622, 0.0031622, -0.0031622],
+                [-0.0031622, 0.0031622, 0.0031622],
+                [0.0031622, -0.0031622, -0.0031622],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new RMSProp(rate: 0.001, decay: 0.1);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero rate')]
+    public function testConstructorWithZeroRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative rate')]
+    public function testConstructorWithNegativeRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: -0.001);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero decay')]
+    public function testConstructorWithZeroDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay equal to 1')]
+    public function testConstructorWithDecayEqualToOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: 1.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay greater than 1')]
+    public function testConstructorWithDecayGreaterThanOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: 1.5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative decay')]
+    public function testConstructorWithNegativeDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From cccfa79140a0769604b5d92e36dd300a89405dff Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 17:45:33 +0200
Subject: [PATCH 03/22] 390 added math explanation for step() methods

---
 src/NeuralNet/Optimizers/StepDecay/StepDecay.php   | 12 ++++++++++++
 src/NeuralNet/Optimizers/Stochastic/Stochastic.php |  7 +++++++
 2 files changed, 19 insertions(+)

diff --git a/src/NeuralNet/Optimizers/StepDecay/StepDecay.php b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
index 246adc6c7..abfeb6f7e 100644
--- a/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
+++ b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
@@ -84,6 +84,18 @@ public function __construct(float $rate = 0.01, int $losses = 100, float $decay
     /**
      * Take a step of gradient descent for a given parameter.
      *
+     * Step Decay update (element-wise):
+     *   floor = ⌊t / k⌋
+     *   η_t = η₀ / (1 + floor · λ)
+     *   Δθ_t = η_t · g_t
+     *
+     * where:
+     *   - t is the current step number,
+     *   - k is the number of steps per floor,
+     *   - η₀ is the initial learning rate,
+     *   - λ is the decay factor,
+     *   - g_t is the current gradient.
+     *
      * @internal
      *
      * @param Parameter $param
diff --git a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
index ffd9daf30..004489a78 100644
--- a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
+++ b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
@@ -44,6 +44,13 @@ public function __construct(float $rate = 0.01)
     /**
      * Take a step of gradient descent for a given parameter.
      *
+     * SGD update (element-wise):
+     *   Δθ_t = η · g_t
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - η is the learning rate.
+     *
      * @internal
      *
      * @param Parameter $param

From f1c55e67537832e5e49a299a639fd3cfe163d5d2 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 23:02:43 +0200
Subject: [PATCH 04/22] 390 convert Momentum to Numpower

---
 docs/neural-network/optimizers/momentum.md    |   2 +-
 .../Optimizers/Momentum/Momentum.php          | 164 ++++++++++++++++++
 src/NeuralNet/Optimizers/RMSProp/RMSProp.php  |   2 +-
 .../Optimizers/Momentum/MomentumTest.php      | 154 ++++++++++++++++
 .../Optimizers/RMSProp/RMSPropTest.php        |  27 +++
 5 files changed, 347 insertions(+), 2 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Momentum/Momentum.php
 create mode 100644 tests/NeuralNet/Optimizers/Momentum/MomentumTest.php

diff --git a/docs/neural-network/optimizers/momentum.md b/docs/neural-network/optimizers/momentum.md
index 7556ca008..017cf0efa 100644
--- a/docs/neural-network/optimizers/momentum.md
+++ b/docs/neural-network/optimizers/momentum.md
@@ -12,7 +12,7 @@ Momentum accelerates each update step by accumulating velocity from past updates
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\Momentum;
+use Rubix\ML\NeuralNet\Optimizers\Momentum\Momentum;
 
 $optimizer = new Momentum(0.01, 0.1, true);
 ```
diff --git a/src/NeuralNet/Optimizers/Momentum/Momentum.php b/src/NeuralNet/Optimizers/Momentum/Momentum.php
new file mode 100644
index 000000000..05e62fa0b
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Momentum/Momentum.php
@@ -0,0 +1,164 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Momentum;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\Helpers\Params;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+/**
+ * Momentum
+ *
+ * Momentum adds velocity to each step until exhausted. It does so by accumulating momentum from past updates and adding
+ * a factor of the previous velocity to the current step.
+ *
+ * References:
+ * [1] D. E. Rumelhart et al. (1988). Learning representations by back-propagating errors.
+ * [2] I. Sutskever et al. (2013). On the importance of initialization and momentum in deep learning.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Momentum implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The rate at which the momentum force decays.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * Should we employ Nesterov's lookahead (NAG) when updating the parameters?
+     *
+     * @var bool
+     */
+    protected bool $lookahead;
+
+    /**
+     * The parameter cache of velocity NDArrays.
+     *
+     * @var NDArray[]
+     */
+    protected array $cache = [
+        //
+    ];
+
+    /**
+     * @param float $rate
+     * @param float $decay
+     * @param bool $lookahead
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.001, float $decay = 0.1, bool $lookahead = false)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($decay <= 0.0 or $decay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Decay must be between 0 and 1, $decay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->decay = $decay;
+        $this->lookahead = $lookahead;
+    }
+
+    /**
+     * Warm the cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $this->cache[$param->id()] = NumPower::zeros($param->param()->shape());
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * Mathematical formulation (per-parameter element):
+     * - Velocity update: v_t = β · v_{t-1} + η · g_t
+     *   where β = 1 − decay and η = rate, and g_t is the current gradient.
+     * - Returned step (the amount added to the parameter by the trainer): Δθ_t = v_t
+     *
+     * Nesterov lookahead (when lookahead = true):
+     * - We apply the same velocity update a second time to approximate NAG:
+     *   v_t ← β · v_t + η · g_t
+     *
+     * Notes:
+     * - This method updates and caches the velocity tensor per Parameter id.
+     * - The actual parameter update is performed by the training loop using the returned velocity.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $velocity = $this->cache[$param->id()];
+
+        // velocity = gradient * rate + velocity * (1 - decay)
+        $velocity = NumPower::add(
+            NumPower::multiply($gradient, $this->rate),
+            NumPower::multiply($velocity, 1.0 - $this->decay)
+        );
+
+        $this->cache[$param->id()] = $velocity;
+
+        if ($this->lookahead) {
+            // Apply lookahead: velocity = gradient * rate + velocity * (1 - decay)
+            $velocity = NumPower::add(
+                NumPower::multiply($gradient, $this->rate),
+                NumPower::multiply($velocity, 1.0 - $this->decay)
+            );
+        }
+
+        return $velocity;
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Momentum (rate: {$this->rate}, decay: {$this->decay},"
+            . ' lookahead: ' . Params::toString($this->lookahead) . ')';
+    }
+}
diff --git a/src/NeuralNet/Optimizers/RMSProp/RMSProp.php b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
index 531f3ad80..7c08aebb2 100644
--- a/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
+++ b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
@@ -56,7 +56,7 @@ class RMSProp implements Optimizer, Adaptive
     /**
      * The cache of running squared gradients.
      *
-     * @var array<NDArray|array>
+     * @var NDArray[]
      */
     protected array $cache = [
         //
diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
new file mode 100644
index 000000000..1b2a90378
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -0,0 +1,154 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\Momentum;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\Momentum\Momentum;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+#[Group('Optimizers')]
+#[CoversClass(Momentum::class)]
+class MomentumTest extends TestCase
+{
+    protected Momentum $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.00001, 0.00005, -0.00002],
+                [-0.00001, 0.00002, 0.00003],
+                [0.00004, -0.00001, -0.0005],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new Momentum(rate: 0.001, decay: 0.1, lookahead: false);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero rate')]
+    public function testConstructorWithZeroRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative rate')]
+    public function testConstructorWithNegativeRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: -0.001);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero decay')]
+    public function testConstructorWithZeroDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay equal to 1')]
+    public function testConstructorWithDecayEqualToOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: 1.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay greater than 1')]
+    public function testConstructorWithDecayGreaterThanOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: 1.5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative decay')]
+    public function testConstructorWithNegativeDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Momentum (rate: 0.001, decay: 0.1, lookahead: false)', (string) $this->optimizer);
+    }
+
+    #[Test]
+    #[TestDox('Warm initializes a zeroed velocity cache with the parameter\'s shape')]
+    public function testWarmInitializesZeroedCache() : void
+    {
+        $param = new Parameter(NumPower::array([
+            [1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0],
+        ]));
+
+        // Warm the optimizer for this parameter
+        $this->optimizer->warm($param);
+
+        // Use reflection to read the protected cache
+        $ref = new \ReflectionClass($this->optimizer);
+        $prop = $ref->getProperty('cache');
+        $prop->setAccessible(true);
+        $cache = $prop->getValue($this->optimizer);
+
+        self::assertArrayHasKey($param->id(), $cache);
+
+        $velocity = $cache[$param->id()];
+
+        // Verify the velocity is an all-zeros tensor of the correct shape
+        $zeros = NumPower::zeros($param->param()->shape());
+        self::assertEqualsWithDelta($zeros->toArray(), $velocity->toArray(), 0.0);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
index 456bd54c0..09d43ac24 100644
--- a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
+++ b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
@@ -110,6 +110,33 @@ public function testToString() : void
         self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
     }
 
+    #[Test]
+    #[TestDox('Warm initializes a zeroed velocity cache with the parameter\'s shape')]
+    public function testWarmInitializesZeroedCache() : void
+    {
+        $param = new Parameter(NumPower::array([
+            [1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0],
+        ]));
+
+        // Warm the optimizer for this parameter
+        $this->optimizer->warm($param);
+
+        // Use reflection to read the protected cache
+        $ref = new \ReflectionClass($this->optimizer);
+        $prop = $ref->getProperty('cache');
+        $prop->setAccessible(true);
+        $cache = $prop->getValue($this->optimizer);
+
+        self::assertArrayHasKey($param->id(), $cache);
+
+        $velocity = $cache[$param->id()];
+
+        // Verify the velocity is an all-zeros tensor of the correct shape
+        $zeros = NumPower::zeros($param->param()->shape());
+        self::assertEqualsWithDelta($zeros->toArray(), $velocity->toArray(), 0.0);
+    }
+
     /**
      * @param Parameter $param
      * @param NDArray $gradient

From 919ce3629cf005ec20fb9f8a4070e2dc2d1835be Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 18:54:42 +0200
Subject: [PATCH 05/22] 390 convert Cyclical to NumPower

---
 docs/neural-network/optimizers/cyclical.md    |  26 ++-
 .../Optimizers/Cyclical/Cyclical.php          | 166 ++++++++++++++++++
 .../Optimizers/Cyclical/CyclicalTest.php      | 143 +++++++++++++++
 3 files changed, 332 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Cyclical/Cyclical.php
 create mode 100644 tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php

diff --git a/docs/neural-network/optimizers/cyclical.md b/docs/neural-network/optimizers/cyclical.md
index 9773004da..f6d3940db 100644
--- a/docs/neural-network/optimizers/cyclical.md
+++ b/docs/neural-network/optimizers/cyclical.md
@@ -1,8 +1,28 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Cyclical.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Cyclical/Cyclical.php">[source]</a></span>
 
 # Cyclical
 The Cyclical optimizer uses a global learning rate that cycles between the lower and upper bound over a designated period while also decaying the upper bound by a factor at each step. Cyclical learning rates have been shown to help escape bad local minima and saddle points of the gradient.
 
+## Mathematical formulation
+Per step (element-wise), the cyclical learning rate and update are computed as:
+
+$$
+\begin{aligned}
+\text{cycle} &= \left\lfloor 1 + \frac{t}{2\,\text{steps}} \right\rfloor \\
+x &= \left| \frac{t}{\text{steps}} - 2\,\text{cycle} + 1 \right| \\
+\text{scale} &= \text{decay}^{\,t} \\
+\eta_t &= \text{lower} + (\text{upper} - \text{lower})\,\max\bigl(0\,1 - x\bigr)\,\text{scale} \\
+\Delta\theta_t &= \eta_t\,g_t
+\end{aligned}
+$$
+
+where:
+- `t` is the current step counter,
+- `steps` is the number of steps in every half cycle,
+- `lower` and `upper` are the learning rate bounds,
+- `decay` is the multiplicative decay applied each step,
+- `g_t` is the current gradient.
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
@@ -13,10 +33,10 @@ The Cyclical optimizer uses a global learning rate that cycles between the lower
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\Cyclical;
+use Rubix\ML\NeuralNet\Optimizers\Cyclical\Cyclical;
 
 $optimizer = new Cyclical(0.001, 0.005, 1000);
 ```
 
 ## References
-[^1]: L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
\ No newline at end of file
+[^1]: L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
diff --git a/src/NeuralNet/Optimizers/Cyclical/Cyclical.php b/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
new file mode 100644
index 000000000..ac22d9d52
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
@@ -0,0 +1,166 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Cyclical;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+
+/**
+ * Cyclical
+ *
+ * The Cyclical optimizer uses a global learning rate that cycles between the
+ * lower and upper bound over a designated period while also decaying the
+ * upper bound by the decay coefficient at each step. Cyclical learning rates
+ * have been shown to help escape bad local minima and saddle points thus
+ * achieving lower training loss.
+ *
+ * References:
+ * [1] L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Cyclical implements Optimizer
+{
+    /**
+     * The lower bound on the learning rate.
+     *
+     * @var float
+     */
+    protected float $lower;
+
+    /**
+     * The upper bound on the learning rate.
+     *
+     * @var float
+     */
+    protected float $upper;
+
+    /**
+     * The range of the learning rate.
+     *
+     * @var float
+     */
+    protected float $range;
+
+    /**
+     * The number of steps in every cycle.
+     *
+     * @var int
+     */
+    protected int $losses;
+
+    /**
+     * The exponential scaling factor applied to each step as decay.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The number of steps taken so far.
+     *
+     * @var int
+     */
+    protected int $t = 0;
+
+    /**
+     * @param float $lower
+     * @param float $upper
+     * @param int $losses
+     * @param float $decay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(
+        float $lower = 0.001,
+        float $upper = 0.006,
+        int $losses = 2000,
+        float $decay = 0.99994
+    ) {
+        if ($lower <= 0.0) {
+            throw new InvalidArgumentException(
+                "Lower bound must be greater than 0, $lower given."
+            );
+        }
+
+        if ($lower > $upper) {
+            throw new InvalidArgumentException(
+                'Lower bound cannot be reater than the upper bound.'
+            );
+        }
+
+        if ($losses < 1) {
+            throw new InvalidArgumentException(
+                "The number of steps per cycle must be greater than 0, $losses given."
+            );
+        }
+
+        if ($decay <= 0.0 or $decay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Decay must be between 0 and 1, $decay given."
+            );
+        }
+
+        $this->lower = $lower;
+        $this->upper = $upper;
+        $this->range = $upper - $lower;
+        $this->losses = $losses;
+        $this->decay = $decay;
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * Cyclical learning rate schedule (per-step, element-wise update):
+     *   - Cycle index:           cycle = floor(1 + t / (2 · losses))
+     *   - Triangular position:   x     = | t / losses − 2 · cycle + 1 |
+     *   - Exponential decay:     scale = decay^t
+     *   - Learning rate at t:    η_t   = lower + (upper − lower) · max(0, 1 − x) · scale
+     *   - Returned step:         Δθ_t  = η_t · g_t
+     *
+     * where:
+     *   - t is the current step counter (incremented after computing η_t),
+     *   - losses is the number of steps per cycle,
+     *   - lower and upper are the learning rate bounds,
+     *   - decay is the multiplicative decay applied each step,
+     *   - g_t is the current gradient.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $cycle = floor(1 + $this->t / (2 * $this->losses));
+
+        $x = abs($this->t / $this->losses - 2 * $cycle + 1);
+
+        $scale = $this->decay ** $this->t;
+
+        $rate = $this->lower + $this->range * max(0, 1 - $x) * $scale;
+
+        ++$this->t;
+
+        return NumPower::multiply($gradient, $rate);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Cyclical (lower: {$this->lower}, upper: {$this->upper},"
+            . " steps: {$this->losses}, decay: {$this->decay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
new file mode 100644
index 000000000..7d1691fe8
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -0,0 +1,143 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\Cyclical;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\Cyclical\Cyclical;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+#[Group('Optimizers')]
+#[CoversClass(Cyclical::class)]
+class CyclicalTest extends TestCase
+{
+    protected Cyclical $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.00001, 0.00005, -0.00002],
+                [-0.00001, 0.00002, 0.00003],
+                [0.00004, -0.00001, -0.0005],
+            ]
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new Cyclical(lower: 0.001, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero lower bound')]
+    public function testConstructorWithZeroLower() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.0, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative lower bound')]
+    public function testConstructorWithNegativeLower() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: -0.001, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when lower bound is greater than upper bound')]
+    public function testConstructorWithLowerGreaterThanUpper() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.01, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero steps per cycle')]
+    public function testConstructorWithZeroSteps() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative steps per cycle')]
+    public function testConstructorWithNegativeSteps() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: -5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero decay')]
+    public function testConstructorWithZeroDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay equal to 1')]
+    public function testConstructorWithDecayEqualToOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay greater than 1')]
+    public function testConstructorWithDecayGreaterThanOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative decay')]
+    public function testConstructorWithNegativeDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Cyclical (lower: 0.001, upper: 0.006, steps: 2000, decay: 0.99994)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From d806494ef7db5ca520d1c121bda49f1d63724fb4 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 18:59:59 +0200
Subject: [PATCH 06/22] 390 added math formulas to momentum.md

---
 docs/neural-network/optimizers/momentum.md | 27 +++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/neural-network/optimizers/momentum.md b/docs/neural-network/optimizers/momentum.md
index 017cf0efa..f949a4115 100644
--- a/docs/neural-network/optimizers/momentum.md
+++ b/docs/neural-network/optimizers/momentum.md
@@ -1,8 +1,33 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Momentum.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Momentum/Momentum.php">[source]</a></span>
 
 # Momentum
 Momentum accelerates each update step by accumulating velocity from past updates and adding a factor of the previous velocity to the current step. Momentum can help speed up training and escape bad local minima when compared with [Stochastic](stochastic.md) Gradient Descent.
 
+## Mathematical formulation
+Per step (element-wise), Momentum updates the velocity and applies it as the parameter step:
+
+$$
+\begin{aligned}
+\beta &= 1 - \text{decay}, \quad \eta = \text{rate} \\
+\text{Velocity update:}\quad v_t &= \beta\,v_{t-1} + \eta\,g_t \\
+\text{Returned step:}\quad \Delta\theta_t &= v_t
+\end{aligned}
+$$
+
+Nesterov lookahead (when `lookahead = true`) is approximated by applying the velocity update a second time:
+
+$$
+\begin{aligned}
+v_t &\leftarrow \beta\,v_t + \eta\,g_t
+\end{aligned}
+$$
+
+where:
+- `g_t` is the current gradient,
+- `v_t` is the velocity (accumulated update),
+- `\beta` is the momentum coefficient (`1 − decay`),
+- `\eta` is the learning rate (`rate`).
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|

From 3fa08ec89e9132a43c5ab4e34319404ce8f140d9 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 19:10:18 +0200
Subject: [PATCH 07/22] 390 added math formulas to rms-prop.md

---
 docs/neural-network/optimizers/cyclical.md | 10 +++++-----
 docs/neural-network/optimizers/momentum.md |  8 ++++----
 docs/neural-network/optimizers/rms-prop.md | 22 ++++++++++++++++++++--
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/docs/neural-network/optimizers/cyclical.md b/docs/neural-network/optimizers/cyclical.md
index f6d3940db..eed8b2779 100644
--- a/docs/neural-network/optimizers/cyclical.md
+++ b/docs/neural-network/optimizers/cyclical.md
@@ -17,11 +17,11 @@ x &= \left| \frac{t}{\text{steps}} - 2\,\text{cycle} + 1 \right| \\
 $$
 
 where:
-- `t` is the current step counter,
-- `steps` is the number of steps in every half cycle,
-- `lower` and `upper` are the learning rate bounds,
-- `decay` is the multiplicative decay applied each step,
-- `g_t` is the current gradient.
+- $t$ is the current step counter,
+- $steps$ is the number of steps in every half cycle,
+- $lower$ and $upper$ are the learning rate bounds,
+- $decay$ is the multiplicative decay applied each step,
+- $g_t$ is the current gradient.
 
 ## Parameters
 | # | Name | Default | Type | Description |
diff --git a/docs/neural-network/optimizers/momentum.md b/docs/neural-network/optimizers/momentum.md
index f949a4115..e9c787a2f 100644
--- a/docs/neural-network/optimizers/momentum.md
+++ b/docs/neural-network/optimizers/momentum.md
@@ -23,10 +23,10 @@ v_t &\leftarrow \beta\,v_t + \eta\,g_t
 $$
 
 where:
-- `g_t` is the current gradient,
-- `v_t` is the velocity (accumulated update),
-- `\beta` is the momentum coefficient (`1 − decay`),
-- `\eta` is the learning rate (`rate`).
+- $g_t$ is the current gradient,
+- $v_t$ is the velocity (accumulated update),
+- $\beta$ is the momentum coefficient ($1 − decay$),
+- $\eta$ is the learning rate ($rate$).
 
 ## Parameters
 | # | Name | Default | Type | Description |
diff --git a/docs/neural-network/optimizers/rms-prop.md b/docs/neural-network/optimizers/rms-prop.md
index ae6b847bc..c531a863e 100644
--- a/docs/neural-network/optimizers/rms-prop.md
+++ b/docs/neural-network/optimizers/rms-prop.md
@@ -1,7 +1,25 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/RMSProp.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/RMSProp/RMSProp.php">[source]</a></span>
 
 # RMS Prop
-An adaptive gradient technique that divides the current gradient over a rolling window of the magnitudes of recent gradients. Unlike [AdaGrad](adagrad.md), RMS Prop does not suffer from an infinitely decaying step size.
+An adaptive gradient technique that divides the current gradient over a rolling window of magnitudes of recent gradients. Unlike [AdaGrad](adagrad.md), RMS Prop does not suffer from an infinitely decaying step size.
+
+## Mathematical formulation
+Per step (element-wise), RMSProp maintains a running average of squared gradients and scales the step by the root-mean-square:
+
+$$
+\begin{aligned}
+\rho &= 1 - \text{decay}, \quad \eta = \text{rate} \\
+\text{Running average:}\quad v_t &= \rho\,v_{t-1} + (1 - \rho)\,g_t^{\,2} \\
+\text{Returned step:}\quad \Delta\theta_t &= \frac{\eta\,g_t}{\max\bigl(\sqrt{v_t},\,\varepsilon\bigr)}
+\end{aligned}
+$$
+
+where:
+- $g_t$ - is the current gradient,
+- $v_t$ - is the running average of squared gradients,
+- $\rho$ - is the averaging coefficient ($1 − decay$),
+- $\eta$ - is the learning rate ($rate$),
+- $\varepsilon$ - is a small constant to avoid division by zero (implemented by clipping $\sqrt{v_t}$ to $[ε, +∞)$).
 
 ## Parameters
 | # | Name | Default | Type | Description |

From 537b586d807fae161b3c6f7760d3a495e78cce52 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 19:19:08 +0200
Subject: [PATCH 08/22] 390 added math formulas to stochastic.md

---
 docs/neural-network/optimizers/step-decay.md  | 20 ++++++++++++++++++-
 docs/neural-network/optimizers/stochastic.md  | 14 +++++++++++++
 .../Optimizers/Stochastic/Stochastic.php      |  4 +++-
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/docs/neural-network/optimizers/step-decay.md b/docs/neural-network/optimizers/step-decay.md
index 0ec9395cc..f5da99c8b 100644
--- a/docs/neural-network/optimizers/step-decay.md
+++ b/docs/neural-network/optimizers/step-decay.md
@@ -1,8 +1,26 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/StepDecay.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/StepDecay/StepDecay.php">[source]</a></span>
 
 # Step Decay
 A learning rate decay optimizer that reduces the global learning rate by a factor whenever it reaches a new *floor*. The number of steps needed to reach a new floor is defined by the *steps* hyper-parameter.
 
+## Mathematical formulation
+Per step (element-wise), the Step Decay learning rate and update are:
+
+$$
+\begin{aligned}
+\text{floor} &= \left\lfloor \frac{t}{k} \right\rfloor \\
+\eta_t &= \frac{\eta_0}{1 + \text{floor}\cdot \lambda} \\
+\Delta\theta_t &= \eta_t\,g_t
+\end{aligned}
+$$
+
+where:
+- $t$ is the current step number,
+- $k$ is the number of steps per floor,
+- $\eta_0$ is the initial learning rate ($rate$),
+- $\lambda$ is the decay factor ($decay$),
+- $g_t$ is the current gradient.
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
diff --git a/docs/neural-network/optimizers/stochastic.md b/docs/neural-network/optimizers/stochastic.md
index 4422e0ddc..bb0096b87 100644
--- a/docs/neural-network/optimizers/stochastic.md
+++ b/docs/neural-network/optimizers/stochastic.md
@@ -3,6 +3,20 @@
 # Stochastic
 A constant learning rate optimizer based on vanilla Stochastic Gradient Descent (SGD).
 
+## Mathematical formulation
+Per step (element-wise), the SGD update scales the gradient by a constant learning rate:
+
+$$
+\begin{aligned}
+\eta &= \text{rate} \\
+\Delta\theta_t &= \eta\,g_t
+\end{aligned}
+$$
+
+where:
+- $g_t$ is the current gradient,
+- $\eta$ is the learning rate ($rate$).
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
diff --git a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
index 004489a78..b2cd6ebac 100644
--- a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
+++ b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
@@ -35,7 +35,9 @@ class Stochastic implements Optimizer
     public function __construct(float $rate = 0.01)
     {
         if ($rate <= 0.0) {
-            throw new InvalidArgumentException("Learning rate must be greater than 0, $rate given.");
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
         }
 
         $this->rate = $rate;

From 331fb3639329c9c021e1a783ea20d8df59815f43 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 16:59:27 +0200
Subject: [PATCH 09/22] 390 convert Adam to NumPower

---
 docs/neural-network/optimizers/adam.md       |  27 ++-
 src/NeuralNet/Optimizers/Adam/Adam.php       | 181 +++++++++++++++++++
 tests/NeuralNet/Optimizers/Adam/AdamTest.php | 101 +++++++++++
 3 files changed, 306 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Adam/Adam.php
 create mode 100644 tests/NeuralNet/Optimizers/Adam/AdamTest.php

diff --git a/docs/neural-network/optimizers/adam.md b/docs/neural-network/optimizers/adam.md
index 3b9898649..0470a9d4a 100644
--- a/docs/neural-network/optimizers/adam.md
+++ b/docs/neural-network/optimizers/adam.md
@@ -1,8 +1,29 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Adam.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Adam.Adam.php">[source]</a></span>
 
 # Adam
 Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentum and RMS properties. In addition to storing an exponentially decaying average of past squared gradients like [RMSprop](rms-prop.md), Adam also keeps an exponentially decaying average of past gradients, similar to [Momentum](momentum.md). Whereas Momentum can be seen as a ball running down a slope, Adam behaves like a heavy ball with friction.
 
+## Mathematical formulation
+Per step (element-wise), Adam maintains exponentially decaying moving averages of the gradient and its element-wise square and uses them to scale the update:
+
+$$
+\begin{aligned}
+\mathbf{v}_t &= (1 - \beta_1)\,\mathbf{v}_{t-1} + \beta_1\,\mathbf{g}_t \\
+\mathbf{n}_t &= (1 - \beta_2)\,\mathbf{n}_{t-1} + \beta_2\,\mathbf{g}_t^{2} \\
+\Delta{\theta}_t &= \alpha\, \frac{\mathbf{v}_t}{\sqrt{\mathbf{n}_t} + \varepsilon}
+\end{aligned}
+$$
+
+where:
+- $t$ is the current step,
+- $\alpha$ is the learning rate (`rate`),
+- $\beta_1$ is the momentum decay (`momentumDecay`),
+- $\beta_2$ is the norm decay (`normDecay`),
+- $\mathbf{g}_t$ is the current gradient, and $\mathbf{g}_t^{2}$ denotes element-wise square,
+- $\varepsilon$ is a small constant for numerical stability (in the implementation, the denominator is clipped from below by `EPSILON`).
+
+Note: This formulation follows the implementation in Rubix ML and does not include bias-correction terms.
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
@@ -12,10 +33,10 @@ Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentu
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\Adam;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
 
 $optimizer = new Adam(0.0001, 0.1, 0.001);
 ```
 
 ## References
-[^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
\ No newline at end of file
+[^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
diff --git a/src/NeuralNet/Optimizers/Adam/Adam.php b/src/NeuralNet/Optimizers/Adam/Adam.php
new file mode 100644
index 000000000..fad8ac1bf
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Adam/Adam.php
@@ -0,0 +1,181 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Adam;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * Adam
+ *
+ * Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both
+ * Momentum and RMS prop to achieve a balance of velocity and stability. In
+ * addition to storing an exponentially decaying average of past squared
+ * gradients like RMSprop, Adam also keeps an exponentially decaying average
+ * of past gradients, similar to Momentum. Whereas Momentum can be seen as a
+ * ball running down a slope, Adam behaves like a heavy ball with friction.
+ *
+ * References:
+ * [1] D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Adam implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The momentum decay rate.
+     *
+     * @var float
+     */
+    protected float $momentumDecay;
+
+    /**
+     * The decay rate of the previous norms.
+     *
+     * @var float
+     */
+    protected float $normDecay;
+
+    /**
+     * The parameter cache of running velocity and squared gradients.
+     *
+     * @var array{0: NDArray, 1: NDArray}[]
+     */
+    protected array $cache = [
+        // id => [velocity, norm]
+    ];
+
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.001, float $momentumDecay = 0.1, float $normDecay = 0.001)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($momentumDecay <= 0.0 or $momentumDecay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Momentum decay must be between 0 and 1, $momentumDecay given."
+            );
+        }
+
+        if ($normDecay <= 0.0 or $normDecay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Norm decay must be between 0 and 1, $normDecay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->momentumDecay = $momentumDecay;
+        $this->normDecay = $normDecay;
+    }
+
+    /**
+     * Warm the parameter cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $zeros = NumPower::zeros($param->param()->shape());
+
+        $this->cache[$param->id()] = [clone $zeros, $zeros];
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * Adam update (element-wise):
+     *   v_t = v_{t-1} + β1 · (g_t − v_{t-1})        // exponential moving average of gradients
+     *   n_t = n_{t-1} + β2 · (g_t^2 − n_{t-1})      // exponential moving average of squared gradients
+     *   Δθ_t = η · v_t / max(√n_t, ε)
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - v_t is the running average of gradients ("velocity"), β1 = momentumDecay,
+     *   - n_t is the running average of squared gradients ("norm"), β2 = normDecay,
+     *   - η is the learning rate (rate), ε is a small constant to avoid division by zero (implemented by clipping √n_t to [ε, +∞)).
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        [$velocity, $norm] = $this->cache[$param->id()];
+
+        $vHat = NumPower::multiply(
+            NumPower::subtract($gradient, $velocity),
+            $this->momentumDecay
+        );
+
+        $velocity = NumPower::add($velocity, $vHat);
+
+        $nHat = NumPower::multiply(
+            NumPower::subtract(NumPower::square($gradient), $norm),
+            $this->normDecay
+        );
+
+        $norm = NumPower::add($norm, $nHat);
+
+        $this->cache[$param->id()] = [$velocity, $norm];
+
+        $denominator = NumPower::sqrt($norm);
+        $denominator = NumPower::clip($denominator, EPSILON, PHP_FLOAT_MAX);
+
+        return NumPower::divide(
+            NumPower::multiply($velocity, $this->rate),
+            $denominator
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Adam (rate: {$this->rate}, momentum decay: {$this->momentumDecay},"
+            . " norm decay: {$this->normDecay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/Adam/AdamTest.php b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
new file mode 100644
index 000000000..dbd6cedd6
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
@@ -0,0 +1,101 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\Adam;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use NDArray;
+use NumPower;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use PHPUnit\Framework\TestCase;
+use Generator;
+
+#[Group('Optimizers')]
+#[CoversClass(Adaptive::class)]
+class AdamTest extends TestCase
+{
+    protected Adam $optimizer;
+
+    public static function invalidConstructorProvider() : Generator
+    {
+        // Invalid rates (<= 0)
+        yield [0.0, 0.1, 0.001];
+        yield [-0.5, 0.1, 0.001];
+
+        // Invalid momentumDecay (<= 0 or >= 1)
+        yield [0.001, 0.0, 0.001];
+        yield [0.001, -0.1, 0.001];
+        yield [0.001, 1.0, 0.001];
+        yield [0.001, 1.1, 0.001];
+
+        // Invalid normDecay (<= 0 or >= 1)
+        yield [0.001, 0.1, 0.0];
+        yield [0.001, 0.1, -0.1];
+        yield [0.001, 0.1, 1.0];
+        yield [0.001, 0.1, 1.1];
+    }
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.0031622, 0.0031622, -0.0031622],
+                [-0.0031622, 0.0031622, 0.0031622],
+                [0.0031622, -0.0031622, -0.0031622],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new Adam(
+            rate: 0.001,
+            momentumDecay: 0.1,
+            normDecay: 0.001
+        );
+    }
+
+    public function testToString() : void
+    {
+        $expected = 'Adam (rate: 0.001, momentum decay: 0.1, norm decay: 0.001)';
+        self::assertSame($expected, (string) $this->optimizer);
+    }
+
+    #[DataProvider('invalidConstructorProvider')]
+    public function testInvalidConstructorParams(float $rate, float $momentumDecay, float $normDecay) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Adam(rate: $rate, momentumDecay: $momentumDecay, normDecay: $normDecay);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From 47ad66599d0a102df19a1dd8eda120a80d8e18c4 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:02:17 +0200
Subject: [PATCH 10/22] 390 refactoring CyclicalTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Cyclical/CyclicalTest.php      | 100 ++++++------------
 1 file changed, 31 insertions(+), 69 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
index 7d1691fe8..5bab9a6c1 100644
--- a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -23,6 +23,19 @@ class CyclicalTest extends TestCase
 {
     protected Cyclical $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero lower' => [0.0, 0.006, 2000, null];
+        yield 'negative lower' => [-0.001, 0.006, 2000, null];
+        yield 'lower > upper' => [0.01, 0.006, 2000, null];
+        yield 'zero steps' => [0.001, 0.006, 0, null];
+        yield 'negative steps' => [0.001, 0.006, -5, null];
+        yield 'zero decay' => [0.001, 0.006, 2000, 0.0];
+        yield 'decay == 1' => [0.001, 0.006, 2000, 1.0];
+        yield 'decay > 1' => [0.001, 0.006, 2000, 1.5];
+        yield 'negative decay' => [0.001, 0.006, 2000, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,82 +63,31 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with zero lower bound')]
-    public function testConstructorWithZeroLower() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.0, upper: 0.006, losses: 2000);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative lower bound')]
-    public function testConstructorWithNegativeLower() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: -0.001, upper: 0.006, losses: 2000);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when lower bound is greater than upper bound')]
-    public function testConstructorWithLowerGreaterThanUpper() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.01, upper: 0.006, losses: 2000);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero steps per cycle')]
-    public function testConstructorWithZeroSteps() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative steps per cycle')]
-    public function testConstructorWithNegativeSteps() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: -5);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero decay')]
-    public function testConstructorWithZeroDecay() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay equal to 1')]
-    public function testConstructorWithDecayEqualToOne() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay greater than 1')]
-    public function testConstructorWithDecayGreaterThanOne() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.5);
+        self::assertEquals('Cyclical (lower: 0.001, upper: 0.006, steps: 2000, decay: 0.99994)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $lower
+     * @param float $upper
+     * @param int $losses
+     * @param float|null $decay
+     * @return void
+     */
     #[Test]
-    #[TestDox('Throws exception when constructed with negative decay')]
-    public function testConstructorWithNegativeDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testConstructorInvalidArgs(float $lower, float $upper, int $losses, ?float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: -0.1);
-    }
 
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('Cyclical (lower: 0.001, upper: 0.006, steps: 2000, decay: 0.99994)', (string) $this->optimizer);
+        if ($decay === null) {
+            new Cyclical(lower: $lower, upper: $upper, losses: $losses);
+        } else {
+            new Cyclical(lower: $lower, upper: $upper, losses: $losses, decay: $decay);
+        }
     }
 
     /**

From 3575565b042faa80eae294374479cecf9315e652 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:05:06 +0200
Subject: [PATCH 11/22] 390 refactoring CyclicalTest - added dataprovider for
 constructor tests

---
 tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
index 5bab9a6c1..aa7102f0f 100644
--- a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -95,7 +95,9 @@ public function testConstructorInvalidArgs(float $lower, float $upper, int $loss
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $step = $this->optimizer->step(param: $param, gradient: $gradient);

From 8677c7670eaefb9add6b8d4d8c90bdce92239c4f Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:05:15 +0200
Subject: [PATCH 12/22] 390 refactoring AdamTest - added dataprovider for
 constructor tests

---
 tests/NeuralNet/Optimizers/Adam/AdamTest.php | 34 ++++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Adam/AdamTest.php b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
index dbd6cedd6..04444001d 100644
--- a/tests/NeuralNet/Optimizers/Adam/AdamTest.php
+++ b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
@@ -9,6 +9,8 @@
 use PHPUnit\Framework\Attributes\Group;
 use NDArray;
 use NumPower;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Exceptions\InvalidArgumentException;
 use Rubix\ML\NeuralNet\Parameters\Parameter;
 use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
@@ -25,20 +27,20 @@ class AdamTest extends TestCase
     public static function invalidConstructorProvider() : Generator
     {
         // Invalid rates (<= 0)
-        yield [0.0, 0.1, 0.001];
-        yield [-0.5, 0.1, 0.001];
+        yield 'zero rate' => [0.0, 0.1, 0.001];
+        yield 'negative rate' => [-0.5, 0.1, 0.001];
 
         // Invalid momentumDecay (<= 0 or >= 1)
-        yield [0.001, 0.0, 0.001];
-        yield [0.001, -0.1, 0.001];
-        yield [0.001, 1.0, 0.001];
-        yield [0.001, 1.1, 0.001];
+        yield 'zero momentumDecay' => [0.001, 0.0, 0.001];
+        yield 'negative momentumDecay' => [0.001, -0.1, 0.001];
+        yield 'momentumDecay == 1' => [0.001, 1.0, 0.001];
+        yield 'momentumDecay > 1' => [0.001, 1.1, 0.001];
 
         // Invalid normDecay (<= 0 or >= 1)
-        yield [0.001, 0.1, 0.0];
-        yield [0.001, 0.1, -0.1];
-        yield [0.001, 0.1, 1.0];
-        yield [0.001, 0.1, 1.1];
+        yield 'zero normDecay' => [0.001, 0.1, 0.0];
+        yield 'negative normDecay' => [0.001, 0.1, -0.1];
+        yield 'normDecay == 1' => [0.001, 0.1, 1.0];
+        yield 'normDecay > 1' => [0.001, 0.1, 1.1];
     }
 
     public static function stepProvider() : Generator
@@ -71,13 +73,23 @@ protected function setUp() : void
         );
     }
 
+    #[Test]
+    #[TestDox('Can be cast to a string')]
     public function testToString() : void
     {
         $expected = 'Adam (rate: 0.001, momentum decay: 0.1, norm decay: 0.001)';
         self::assertSame($expected, (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     * @return void
+     */
+    #[Test]
     #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
     public function testInvalidConstructorParams(float $rate, float $momentumDecay, float $normDecay) : void
     {
         $this->expectException(InvalidArgumentException::class);
@@ -89,7 +101,9 @@ public function testInvalidConstructorParams(float $rate, float $momentumDecay,
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $this->optimizer->warm($param);

From 269405beeb16678e1a676213bc848f437aaec180 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:07:48 +0200
Subject: [PATCH 13/22] 390 refactoring MomentumTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Momentum/MomentumTest.php      | 74 ++++++-------------
 1 file changed, 23 insertions(+), 51 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
index 1b2a90378..fb84d6d5c 100644
--- a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -23,6 +23,16 @@ class MomentumTest extends TestCase
 {
     protected Momentum $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 0.1];
+        yield 'negative rate' => [-0.001, 0.1];
+        yield 'zero decay' => [0.001, 0.0];
+        yield 'decay == 1' => [0.001, 1.0];
+        yield 'decay > 1' => [0.001, 1.5];
+        yield 'negative decay' => [0.001, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,64 +60,25 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with zero rate')]
-    public function testConstructorWithZeroRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative rate')]
-    public function testConstructorWithNegativeRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: -0.001);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero decay')]
-    public function testConstructorWithZeroDecay() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.001, decay: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay equal to 1')]
-    public function testConstructorWithDecayEqualToOne() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.001, decay: 1.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay greater than 1')]
-    public function testConstructorWithDecayGreaterThanOne() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.001, decay: 1.5);
+        self::assertEquals('Momentum (rate: 0.001, decay: 0.1, lookahead: false)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @param float $decay
+     * @return void
+     */
     #[Test]
-    #[TestDox('Throws exception when constructed with negative decay')]
-    public function testConstructorWithNegativeDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
 
-        new Momentum(rate: 0.001, decay: -0.1);
-    }
-
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('Momentum (rate: 0.001, decay: 0.1, lookahead: false)', (string) $this->optimizer);
+        new Momentum(rate: $rate, decay: $decay);
     }
 
     #[Test]
@@ -143,6 +114,7 @@ public function testWarmInitializesZeroedCache() : void
      * @param list<list<float>> $expected
      */
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $this->optimizer->warm($param);

From aca753eeaf5420b656ea8631061cb2e5f437cb56 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:09:57 +0200
Subject: [PATCH 14/22] 390 refactoring RMSPropTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Momentum/MomentumTest.php      |  1 +
 .../Optimizers/RMSProp/RMSPropTest.php        | 70 +++++--------------
 2 files changed, 20 insertions(+), 51 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
index fb84d6d5c..1b486efa5 100644
--- a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -113,6 +113,7 @@ public function testWarmInitializesZeroedCache() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
     #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
diff --git a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
index 09d43ac24..f47e4f2b3 100644
--- a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
+++ b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
@@ -23,6 +23,16 @@ class RMSPropTest extends TestCase
 {
     protected RMSProp $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 0.1];
+        yield 'negative rate' => [-0.001, 0.1];
+        yield 'zero decay' => [0.001, 0.0];
+        yield 'decay == 1' => [0.001, 1.0];
+        yield 'decay > 1' => [0.001, 1.5];
+        yield 'negative decay' => [0.001, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,64 +60,20 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with zero rate')]
-    public function testConstructorWithZeroRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative rate')]
-    public function testConstructorWithNegativeRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: -0.001);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero decay')]
-    public function testConstructorWithZeroDecay() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.001, decay: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay equal to 1')]
-    public function testConstructorWithDecayEqualToOne() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.001, decay: 1.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay greater than 1')]
-    public function testConstructorWithDecayGreaterThanOne() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.001, decay: 1.5);
+        self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with negative decay')]
-    public function testConstructorWithNegativeDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
 
-        new RMSProp(rate: 0.001, decay: -0.1);
-    }
-
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
+        new RMSProp(rate: $rate, decay: $decay);
     }
 
     #[Test]
@@ -142,7 +108,9 @@ public function testWarmInitializesZeroedCache() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $this->optimizer->warm($param);

From e9c48315a21f9cb66160ab1f3a8b3059db1e19cc Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:12:11 +0200
Subject: [PATCH 15/22] 390 refactoring StepDecayTest - added dataprovider for
 constructor tests

---
 .../Optimizers/StepDecay/StepDecayTest.php    | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
index f3535552b..7d581e31b 100644
--- a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
+++ b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
@@ -24,6 +24,15 @@ class StepDecayTest extends TestCase
 {
     protected StepDecay $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 100, 0.001];
+        yield 'negative rate' => [-0.001, 100, 0.001];
+        yield 'zero losses' => [0.01, 0, 0.001];
+        yield 'negative losses' => [0.01, -5, 0.001];
+        yield 'negative decay' => [0.01, 100, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -51,37 +60,26 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with invalid learning rate')]
-    public function testConstructorWithInvalidRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new StepDecay(rate: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with invalid losses')]
-    public function testConstructorWithInvalidLosses() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new StepDecay(rate: 0.01, losses: 0);
+        self::assertEquals('Step Decay (rate: 0.001, steps: 100, decay: 0.001)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @param int $losses
+     * @param float $decay
+     * @return void
+     */
     #[Test]
-    #[TestDox('Throws exception when constructed with invalid decay')]
-    public function testConstructorWithInvalidDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, int $losses, float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
 
-        new StepDecay(rate: 0.01, losses: 100, decay: -0.1);
-    }
-
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('Step Decay (rate: 0.001, steps: 100, decay: 0.001)', (string) $this->optimizer);
+        new StepDecay(rate: $rate, losses: $losses, decay: $decay);
     }
 
     /**
@@ -89,7 +87,9 @@ public function testToString() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $step = $this->optimizer->step(param: $param, gradient: $gradient);

From 8d3f76a4baf0d28894d66c6a651e73047839c16b Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:13:39 +0200
Subject: [PATCH 16/22] 390 refactoring StochasticTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Stochastic/StochasticTest.php  | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php b/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
index 57a50335f..2e16462d1 100644
--- a/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
+++ b/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
@@ -23,6 +23,12 @@ class StochasticTest extends TestCase
 {
     protected Stochastic $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0];
+        yield 'negative rate' => [-0.001];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,19 +56,24 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with invalid learning rate')]
-    public function testConstructorWithInvalidRate() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Stochastic(0.0);
+        self::assertEquals('Stochastic (rate: 0.001)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @return void
+     */
     #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate) : void
     {
-        self::assertEquals('Stochastic (rate: 0.001)', (string) $this->optimizer);
+        $this->expectException(InvalidArgumentException::class);
+
+        new Stochastic($rate);
     }
 
     /**
@@ -70,7 +81,9 @@ public function testToString() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $step = $this->optimizer->step(param: $param, gradient: $gradient);

From 23397ef90a74606a16fe130b5331c2d38503a662 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 23:44:16 +0200
Subject: [PATCH 17/22] 390 convert AdaMax to NumPower

---
 src/NeuralNet/Optimizers/AdaMax/AdaMax.php | 90 ++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 src/NeuralNet/Optimizers/AdaMax/AdaMax.php

diff --git a/src/NeuralNet/Optimizers/AdaMax/AdaMax.php b/src/NeuralNet/Optimizers/AdaMax/AdaMax.php
new file mode 100644
index 000000000..ae13d2249
--- /dev/null
+++ b/src/NeuralNet/Optimizers/AdaMax/AdaMax.php
@@ -0,0 +1,90 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\AdaMax;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * AdaMax
+ *
+ * A version of Adam that replaces the RMS property with the infinity norm of the gradients.
+ *
+ * References:
+ * [1] D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class AdaMax extends Adam
+{
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     */
+    public function __construct(float $rate = 0.001, float $momentumDecay = 0.1, float $normDecay = 0.001)
+    {
+        parent::__construct($rate, $momentumDecay, $normDecay);
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * AdaMax update (element-wise):
+     *   v_t = v_{t-1} + β1 · (g_t − v_{t-1})
+     *   u_t = max(β2 · u_{t-1}, |g_t|)
+     *   Δθ_t = η · v_t / max(u_t, ε)
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        [$velocity, $norm] = $this->cache[$param->id()];
+
+        $vHat = NumPower::multiply(
+            NumPower::subtract($gradient, $velocity),
+            $this->momentumDecay
+        );
+
+        $velocity = NumPower::add($velocity, $vHat);
+
+        // Infinity norm accumulator
+        $norm = NumPower::multiply($norm, 1.0 - $this->normDecay);
+        $absGrad = NumPower::abs($gradient);
+        $norm = NumPower::maximum($norm, $absGrad);
+
+        $this->cache[$param->id()] = [$velocity, $norm];
+
+        $norm = NumPower::clip($norm, EPSILON, PHP_FLOAT_MAX);
+
+        return NumPower::multiply(
+            NumPower::divide($velocity, $norm),
+            $this->rate
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "AdaMax (rate: {$this->rate}, momentum decay: {$this->momentumDecay},"
+            . " norm decay: {$this->normDecay})";
+    }
+}

From 223a90e756366e1cade74bd069b31ecc51082edc Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 14 Nov 2025 16:50:06 +0200
Subject: [PATCH 18/22] 390 convert AdaMax to NumPower

---
 docs/neural-network/optimizers/adam.md        |   2 -
 docs/neural-network/optimizers/adamax.md      |  25 +++-
 .../Optimizers/AdaMax/AdaMaxTest.php          | 111 ++++++++++++++++++
 3 files changed, 133 insertions(+), 5 deletions(-)
 create mode 100644 tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php

diff --git a/docs/neural-network/optimizers/adam.md b/docs/neural-network/optimizers/adam.md
index 0470a9d4a..b58c70fcb 100644
--- a/docs/neural-network/optimizers/adam.md
+++ b/docs/neural-network/optimizers/adam.md
@@ -22,8 +22,6 @@ where:
 - $\mathbf{g}_t$ is the current gradient, and $\mathbf{g}_t^{2}$ denotes element-wise square,
 - $\varepsilon$ is a small constant for numerical stability (in the implementation, the denominator is clipped from below by `EPSILON`).
 
-Note: This formulation follows the implementation in Rubix ML and does not include bias-correction terms.
-
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
diff --git a/docs/neural-network/optimizers/adamax.md b/docs/neural-network/optimizers/adamax.md
index 6b1d9ea05..ff02f925a 100644
--- a/docs/neural-network/optimizers/adamax.md
+++ b/docs/neural-network/optimizers/adamax.md
@@ -1,8 +1,27 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/AdaMax.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/AdaMax/AdaMax.php">[source]</a></span>
 
 # AdaMax
 A version of the [Adam](adam.md) optimizer that replaces the RMS property with the infinity norm of the past gradients. As such, AdaMax is generally more suitable for sparse parameter updates and noisy gradients.
 
+## Mathematical formulation
+Per step (element-wise), AdaMax maintains an exponentially decaying moving average of the gradient (velocity) and an infinity-norm accumulator of past gradients, and uses them to scale the update:
+
+$$
+\begin{aligned}
+\mathbf{v}_t &= (1 - \beta_1)\,\mathbf{v}_{t-1} + \beta_1\,\mathbf{g}_t \\
+\mathbf{u}_t &= \max\big(\beta_2\,\mathbf{u}_{t-1},\ |\mathbf{g}_t|\big) \\
+\Delta{\theta}_t &= \alpha\, \frac{\mathbf{v}_t}{\max(\mathbf{u}_t, \varepsilon)}
+\end{aligned}
+$$
+
+where:
+- $t$ is the current step,
+- $\alpha$ is the learning rate (`rate`),
+- $\beta_1$ is the momentum decay (`momentumDecay`),
+- $\beta_2$ is the norm decay (`normDecay`),
+- $\mathbf{g}_t$ is the current gradient and $|\mathbf{g}_t|$ denotes element-wise absolute value,
+- $\varepsilon$ is a small constant for numerical stability (in the implementation, the denominator is clipped from below by `EPSILON`).
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
@@ -12,10 +31,10 @@ A version of the [Adam](adam.md) optimizer that replaces the RMS property with t
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\AdaMax;
+use Rubix\ML\NeuralNet\Optimizers\AdaMax\AdaMax;
 
 $optimizer = new AdaMax(0.0001, 0.1, 0.001);
 ```
 
 ## References
-[^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
\ No newline at end of file
+[^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
diff --git a/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php b/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php
new file mode 100644
index 000000000..d32bf0b3d
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php
@@ -0,0 +1,111 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\AdaMax;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use NDArray;
+use NumPower;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\AdaMax\AdaMax;
+use PHPUnit\Framework\TestCase;
+use Generator;
+
+#[Group('Optimizers')]
+#[CoversClass(AdaMax::class)]
+class AdaMaxTest extends TestCase
+{
+    protected AdaMax $optimizer;
+
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 0.1, 0.001];
+        yield 'negative rate' => [-0.001, 0.1, 0.001];
+        yield 'zero momentum decay' => [0.001, 0.0, 0.001];
+        yield 'momentum decay == 1' => [0.001, 1.0, 0.001];
+        yield 'momentum decay > 1' => [0.001, 1.5, 0.001];
+        yield 'negative momentum decay' => [0.001, -0.1, 0.001];
+        yield 'zero norm decay' => [0.001, 0.1, 0.0];
+        yield 'norm decay == 1' => [0.001, 0.1, 1.0];
+        yield 'norm decay > 1' => [0.001, 0.1, 1.5];
+        yield 'negative norm decay' => [0.001, 0.1, -0.1];
+    }
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.0001, 0.0001, -0.0001],
+                [-0.0001, 0.0001, 0.0001],
+                [0.0001, -0.0001, -0.0001],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new AdaMax(
+            rate: 0.001,
+            momentumDecay: 0.1,
+            normDecay: 0.001
+        );
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('AdaMax (rate: 0.001, momentum decay: 0.1, norm decay: 0.001)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     * @return void
+     */
+    #[Test]
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, float $momentumDecay, float $normDecay) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new AdaMax(rate: $rate, momentumDecay: $momentumDecay, normDecay: $normDecay);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[Test]
+    #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}
+
+

From db1c6dbe675b5b18a2562118cdf2ed1830e003f2 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 14 Nov 2025 16:52:50 +0200
Subject: [PATCH 19/22] 390 Added warm initialization test for zeroed Adam
 optimizer caches

---
 tests/NeuralNet/Optimizers/Adam/AdamTest.php | 28 ++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/NeuralNet/Optimizers/Adam/AdamTest.php b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
index 04444001d..b0549ae70 100644
--- a/tests/NeuralNet/Optimizers/Adam/AdamTest.php
+++ b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
@@ -81,6 +81,33 @@ public function testToString() : void
         self::assertSame($expected, (string) $this->optimizer);
     }
 
+    #[Test]
+    #[TestDox('Warm initializes zeroed velocity and norm caches with the parameter\'s shape')]
+    public function testWarmInitializesZeroedCache() : void
+    {
+        $param = new Parameter(NumPower::array([
+            [1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0],
+        ]));
+
+        // Warm the optimizer for this parameter
+        $this->optimizer->warm($param);
+
+        // Inspect protected cache via reflection
+        $ref = new \ReflectionClass($this->optimizer);
+        $prop = $ref->getProperty('cache');
+        $prop->setAccessible(true);
+        $cache = $prop->getValue($this->optimizer);
+
+        self::assertArrayHasKey($param->id(), $cache);
+
+        [$velocity, $norm] = $cache[$param->id()];
+
+        $zeros = NumPower::zeros($param->param()->shape());
+        self::assertEqualsWithDelta($zeros->toArray(), $velocity->toArray(), 0.0);
+        self::assertEqualsWithDelta($zeros->toArray(), $norm->toArray(), 0.0);
+    }
+
     /**
      * @param float $rate
      * @param float $momentumDecay
@@ -113,3 +140,4 @@ public function testStep(Parameter $param, NDArray $gradient, array $expected) :
         self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
     }
 }
+

From 548c055bf0339bf60916682a9cdb5447669a5d2d Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 14 Nov 2025 16:54:57 +0200
Subject: [PATCH 20/22] Code cleanup: removed redundant docblocks, adjusted
 formatting, and applied consistent style across optimizer tests and Parameter
 class.

---
 src/NeuralNet/Parameters/Parameter.php              | 13 ++++++-------
 tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php    |  3 ---
 tests/NeuralNet/Optimizers/Adam/AdamTest.php        |  2 --
 .../NeuralNet/Optimizers/Cyclical/CyclicalTest.php  |  3 +--
 .../NeuralNet/Optimizers/Momentum/MomentumTest.php  |  1 -
 .../Optimizers/StepDecay/StepDecayTest.php          |  3 ---
 .../Optimizers/Stochastic/StochasticTest.php        |  1 -
 7 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/NeuralNet/Parameters/Parameter.php b/src/NeuralNet/Parameters/Parameter.php
index efa7cf88a..0cef2e87a 100644
--- a/src/NeuralNet/Parameters/Parameter.php
+++ b/src/NeuralNet/Parameters/Parameter.php
@@ -22,7 +22,6 @@
 
 /**
  * Parameter
- *
  */
 class Parameter
 {
@@ -61,7 +60,7 @@ public function __construct(NDArray $param)
      *
      * @return int
      */
-    public function id(): int
+    public function id() : int
     {
         return $this->id;
     }
@@ -71,7 +70,7 @@ public function id(): int
      *
      * @return NDArray
      */
-    public function param(): NDArray
+    public function param() : NDArray
     {
         return $this->param;
     }
@@ -79,10 +78,10 @@ public function param(): NDArray
     /**
      * Update the parameter with the gradient and optimizer.
      *
-     * @param NDArray    $gradient
-     * @param Optimizer  $optimizer
+     * @param NDArray $gradient
+     * @param Optimizer $optimizer
      */
-    public function update(NDArray $gradient, Optimizer $optimizer): void
+    public function update(NDArray $gradient, Optimizer $optimizer) : void
     {
         $step = $optimizer->step($this, $gradient);
 
@@ -92,7 +91,7 @@ public function update(NDArray $gradient, Optimizer $optimizer): void
     /**
      * Perform a deep copy of the object upon cloning.
      */
-    public function __clone(): void
+    public function __clone() : void
     {
         $this->param = clone $this->param;
     }
diff --git a/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php b/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php
index d32bf0b3d..0ca059561 100644
--- a/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php
+++ b/tests/NeuralNet/Optimizers/AdaMax/AdaMaxTest.php
@@ -78,7 +78,6 @@ public function testToString() : void
      * @param float $rate
      * @param float $momentumDecay
      * @param float $normDecay
-     * @return void
      */
     #[Test]
     #[DataProvider('invalidConstructorProvider')]
@@ -107,5 +106,3 @@ public function testStep(Parameter $param, NDArray $gradient, array $expected) :
         self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
     }
 }
-
-
diff --git a/tests/NeuralNet/Optimizers/Adam/AdamTest.php b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
index b0549ae70..bcf19d344 100644
--- a/tests/NeuralNet/Optimizers/Adam/AdamTest.php
+++ b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
@@ -112,7 +112,6 @@ public function testWarmInitializesZeroedCache() : void
      * @param float $rate
      * @param float $momentumDecay
      * @param float $normDecay
-     * @return void
      */
     #[Test]
     #[DataProvider('invalidConstructorProvider')]
@@ -140,4 +139,3 @@ public function testStep(Parameter $param, NDArray $gradient, array $expected) :
         self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
     }
 }
-
diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
index aa7102f0f..302b770be 100644
--- a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -53,7 +53,7 @@ public static function stepProvider() : Generator
                 [0.00001, 0.00005, -0.00002],
                 [-0.00001, 0.00002, 0.00003],
                 [0.00004, -0.00001, -0.0005],
-            ]
+            ],
         ];
     }
 
@@ -74,7 +74,6 @@ public function testToString() : void
      * @param float $upper
      * @param int $losses
      * @param float|null $decay
-     * @return void
      */
     #[Test]
     #[DataProvider('invalidConstructorProvider')]
diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
index 1b486efa5..03b65f9a7 100644
--- a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -69,7 +69,6 @@ public function testToString() : void
     /**
      * @param float $rate
      * @param float $decay
-     * @return void
      */
     #[Test]
     #[DataProvider('invalidConstructorProvider')]
diff --git a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
index 7d581e31b..ae7f78810 100644
--- a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
+++ b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
@@ -13,7 +13,6 @@
 use PHPUnit\Framework\Attributes\Test;
 use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Exceptions\InvalidArgumentException;
-use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
 use Rubix\ML\NeuralNet\Parameters\Parameter;
 use Rubix\ML\NeuralNet\Optimizers\StepDecay\StepDecay;
 use PHPUnit\Framework\TestCase;
@@ -70,7 +69,6 @@ public function testToString() : void
      * @param float $rate
      * @param int $losses
      * @param float $decay
-     * @return void
      */
     #[Test]
     #[DataProvider('invalidConstructorProvider')]
@@ -97,4 +95,3 @@ public function testStep(Parameter $param, NDArray $gradient, array $expected) :
         self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
     }
 }
-
diff --git a/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php b/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
index 2e16462d1..c24b990f7 100644
--- a/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
+++ b/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
@@ -64,7 +64,6 @@ public function testToString() : void
 
     /**
      * @param float $rate
-     * @return void
      */
     #[Test]
     #[DataProvider('invalidConstructorProvider')]

From 40cf94b3c774ba3b6d7bf898bff1483465ad0165 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 14 Nov 2025 17:21:32 +0200
Subject: [PATCH 21/22] 390 convert AdaGrad to NumPower

---
 docs/neural-network/optimizers/adagrad.md     |  22 ++-
 src/NeuralNet/Optimizers/AdaGrad/AdaGrad.php  | 134 ++++++++++++++++++
 .../Optimizers/AdaGrad/AdaGradTest.php        |  94 ++++++++++++
 3 files changed, 247 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/AdaGrad/AdaGrad.php
 create mode 100644 tests/NeuralNet/Optimizers/AdaGrad/AdaGradTest.php

diff --git a/docs/neural-network/optimizers/adagrad.md b/docs/neural-network/optimizers/adagrad.md
index 9cfddff25..2e55a9953 100644
--- a/docs/neural-network/optimizers/adagrad.md
+++ b/docs/neural-network/optimizers/adagrad.md
@@ -1,8 +1,24 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/AdaGrad.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/AdaGrad/AdaGrad.php">[source]</a></span>
 
 # AdaGrad
 Short for *Adaptive Gradient*, the AdaGrad Optimizer speeds up the learning of parameters that do not change often and slows down the learning of parameters that do enjoy heavy activity. Due to AdaGrad's infinitely decaying step size, training may be slow or fail to converge using a low learning rate.
 
+## Mathematical formulation
+Per step (element-wise), AdaGrad accumulates the sum of squared gradients and scales the update by the root of this sum:
+
+$$
+\begin{aligned}
+\mathbf{n}_t &= \mathbf{n}_{t-1} + \mathbf{g}_t^{2} \\
+\Delta{\theta}_t &= \alpha\, \frac{\mathbf{g}_t}{\sqrt{\mathbf{n}_t} + \varepsilon}
+\end{aligned}
+$$
+
+where:
+- $t$ is the current step,
+- $\alpha$ is the learning rate (`rate`),
+- $\mathbf{g}_t$ is the current gradient, and $\mathbf{g}_t^{2}$ denotes element-wise square,
+- $\varepsilon$ is a small constant for numerical stability (in the implementation, the denominator is clipped from below by `EPSILON`).
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
@@ -10,10 +26,10 @@ Short for *Adaptive Gradient*, the AdaGrad Optimizer speeds up the learning of p
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\AdaGrad;
+use Rubix\ML\NeuralNet\Optimizers\AdaGrad\AdaGrad;
 
 $optimizer = new AdaGrad(0.125);
 ```
 
 ## References
-[^1]: J. Duchi et al. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization.
\ No newline at end of file
+[^1]: J. Duchi et al. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization.
diff --git a/src/NeuralNet/Optimizers/AdaGrad/AdaGrad.php b/src/NeuralNet/Optimizers/AdaGrad/AdaGrad.php
new file mode 100644
index 000000000..b6c92bd56
--- /dev/null
+++ b/src/NeuralNet/Optimizers/AdaGrad/AdaGrad.php
@@ -0,0 +1,134 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\AdaGrad;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * AdaGrad
+ *
+ * Short for Adaptive Gradient, the AdaGrad Optimizer speeds up the learning of
+ * parameters that do not change often and slows down the learning of parameters
+ * that do enjoy heavy activity.
+ *
+ * References:
+ * [1] J. Duchi et al. (2011). Adaptive Subgradient Methods for Online Learning
+ * and Stochastic Optimization.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class AdaGrad implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The cache of sum of squared gradients.
+     *
+     * @var NDArray[]
+     */
+    protected array $cache = [
+        //
+    ];
+
+    /**
+     * @param float $rate
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.01)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException("Learning rate must be greater than 0, $rate given.");
+        }
+
+        $this->rate = $rate;
+    }
+
+    /**
+     * Warm the parameter cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $this->cache[$param->id()] = NumPower::zeros($param->param()->shape());
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * AdaGrad update (element-wise):
+     *   n_t = n_{t-1} + g_t^2
+     *   Δθ_t = η · g_t / max(√n_t, ε)
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - n_t is the accumulated (running) sum of squared gradients,
+     *   - η is the learning rate (rate),
+     *   - ε is a small constant to avoid division by zero (implemented via clipping √n_t to [ε, +∞)).
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $norm = $this->cache[$param->id()];
+
+        // Update accumulated squared gradients: norm = norm + gradient^2
+        $norm = NumPower::add($norm, NumPower::square($gradient));
+
+        $this->cache[$param->id()] = $norm;
+
+        // denominator = max(sqrt(norm), EPSILON)
+        $denominator = NumPower::sqrt($norm);
+        $denominator = NumPower::clip($denominator, EPSILON, PHP_FLOAT_MAX);
+
+        // return rate * gradient / denominator
+        return NumPower::divide(
+            NumPower::multiply($gradient, $this->rate),
+            $denominator
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "AdaGrad (rate: {$this->rate})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/AdaGrad/AdaGradTest.php b/tests/NeuralNet/Optimizers/AdaGrad/AdaGradTest.php
new file mode 100644
index 000000000..44ff773f5
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/AdaGrad/AdaGradTest.php
@@ -0,0 +1,94 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\AdaGrad;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\AdaGrad\AdaGrad;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+#[Group('Optimizers')]
+#[CoversClass(AdaGrad::class)]
+class AdaGradTest extends TestCase
+{
+    protected AdaGrad $optimizer;
+
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0];
+        yield 'negative rate' => [-0.001];
+    }
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.001, 0.001, -0.001],
+                [-0.001, 0.001, 0.001],
+                [0.001, -0.001, -0.001],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new AdaGrad(0.001);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertSame('AdaGrad (rate: 0.01)', (string) (new AdaGrad()));
+    }
+
+    /**
+     * @param float $rate
+     */
+    #[Test]
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new AdaGrad(rate: $rate);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[Test]
+    #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From a67655fa021955db28047eb567a435ecdbde7b29 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 14 Nov 2025 17:40:39 +0200
Subject: [PATCH 22/22] 390- Fix broken link to the Adam optimizer source file
 in documentation

---
 docs/neural-network/optimizers/adam.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/neural-network/optimizers/adam.md b/docs/neural-network/optimizers/adam.md
index b58c70fcb..d10a469f3 100644
--- a/docs/neural-network/optimizers/adam.md
+++ b/docs/neural-network/optimizers/adam.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Adam.Adam.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Adam/Adam.php">[source]</a></span>
 
 # Adam
 Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentum and RMS properties. In addition to storing an exponentially decaying average of past squared gradients like [RMSprop](rms-prop.md), Adam also keeps an exponentially decaying average of past gradients, similar to [Momentum](momentum.md). Whereas Momentum can be seen as a ball running down a slope, Adam behaves like a heavy ball with friction.