From f99cc6349d93b71efc8527c2b82cbcd5e6fc5bec Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Tue, 4 Nov 2025 13:30:34 +0530
Subject: [PATCH 01/13] Add AdaptiveAveragePooling2D and AdaptiveMaxPooling2D
 layers

---
 keras/src/backend/jax/__init__.py             |   2 +
 keras/src/backend/jax/nn.py                   | 152 +++++++++++++++
 keras/src/layers/__init__.py                  |   4 +
 keras/src/layers/pooling/__init__.py          |   4 +
 .../pooling/adaptive_average_pooling2d.py     | 112 +++++++++++
 .../layers/pooling/adaptive_max_pooling2d.py  | 112 +++++++++++
 .../layers/pooling/adaptive_pooling2d_test.py | 177 ++++++++++++++++++
 keras/src/ops/nn.py                           | 107 +++++++++++
 8 files changed, 670 insertions(+)
 create mode 100644 keras/src/layers/pooling/adaptive_average_pooling2d.py
 create mode 100644 keras/src/layers/pooling/adaptive_max_pooling2d.py
 create mode 100644 keras/src/layers/pooling/adaptive_pooling2d_test.py

diff --git a/keras/src/backend/jax/__init__.py b/keras/src/backend/jax/__init__.py
index 89ac0fa71c8c..afae28a7614f 100644
--- a/keras/src/backend/jax/__init__.py
+++ b/keras/src/backend/jax/__init__.py
@@ -25,6 +25,8 @@
 from keras.src.backend.jax.core import shape
 from keras.src.backend.jax.core import stop_gradient
 from keras.src.backend.jax.core import vectorized_map
+from keras.src.backend.jax.nn import adaptive_avg_pool
+from keras.src.backend.jax.nn import adaptive_max_pool
 from keras.src.backend.jax.rnn import cudnn_ok
 from keras.src.backend.jax.rnn import gru
 from keras.src.backend.jax.rnn import lstm
diff --git a/keras/src/backend/jax/nn.py b/keras/src/backend/jax/nn.py
index 15cc90f73747..084ce8d81792 100644
--- a/keras/src/backend/jax/nn.py
+++ b/keras/src/backend/jax/nn.py
@@ -1464,3 +1464,155 @@ def _pair(x):
     # ---- reshape -> (N, C*kH*kW, L) ----
     _, CKK, oH, oW = patches.shape
     return patches.reshape(N, CKK, oH * oW)
+
+
+def _adaptive_pool_start_index(output_idx, output_size, input_size):
+    """Calculate start index for adaptive pooling (PyTorch compatible)."""
+    return jnp.floor((output_idx * input_size) / output_size).astype(jnp.int32)
+
+
+def _adaptive_pool_end_index(output_idx, output_size, input_size):
+    """Calculate end index for adaptive pooling (PyTorch compatible)."""
+    return jnp.ceil(((output_idx + 1) * input_size) / output_size).astype(
+        jnp.int32
+    )
+
+
+def adaptive_avg_pool(
+    inputs, output_size, data_format="channels_last", name=None
+):
+    """
+    Adaptive average pooling for JAX backend (PyTorch-compatible).
+    """
+    # Convert output_size to tuple
+    spatial_dims = inputs.ndim - 2
+    if isinstance(output_size, int):
+        output_size = (output_size,) * spatial_dims
+    else:
+        output_size = tuple(output_size)
+
+    # Get spatial shape
+    if data_format == "channels_last":
+        batch_size = inputs.shape[0]
+        channels = inputs.shape[-1]
+        spatial_shape = inputs.shape[1:-1]
+    else:  # channels_first
+        batch_size = inputs.shape[0]
+        channels = inputs.shape[1]
+        spatial_shape = inputs.shape[2:]
+
+    if len(output_size) != 2:
+        raise NotImplementedError(
+            "Only 2D adaptive pooling is currently supported"
+        )
+
+    out_h, out_w = output_size
+    in_h, in_w = spatial_shape
+
+    # Build output by iterating over output positions
+    result_list = []
+
+    for i in range(out_h):
+        for j in range(out_w):
+            # Calculate pooling region for this output position
+            start_h = jnp.floor((i * in_h) / out_h).astype(jnp.int32)
+            end_h = jnp.ceil(((i + 1) * in_h) / out_h).astype(jnp.int32)
+            start_w = jnp.floor((j * in_w) / out_w).astype(jnp.int32)
+            end_w = jnp.ceil(((j + 1) * in_w) / out_w).astype(jnp.int32)
+
+            # Extract region and apply average pooling
+            if data_format == "channels_last":
+                region = inputs[:, start_h:end_h, start_w:end_w, :]
+                # Average over spatial dimensions (axis 1, 2)
+                pooled = jnp.mean(region, axis=(1, 2))
+            else:  # channels_first
+                region = inputs[:, :, start_h:end_h, start_w:end_w]
+                # Average over spatial dimensions (axis 2, 3)
+                pooled = jnp.mean(region, axis=(2, 3))
+
+            result_list.append(pooled)
+
+    # Stack results: (out_h*out_w, batch, channels)
+    output = jnp.stack(result_list, axis=0)
+
+    # Reshape and transpose to correct output shape
+    if data_format == "channels_last":
+        # (out_h*out_w, batch, channels) -> (batch, out_h, out_w, channels)
+        output = output.reshape(out_h, out_w, batch_size, channels)
+        output = jnp.transpose(output, (2, 0, 1, 3))
+    else:  # channels_first
+        # (out_h*out_w, batch, channels) -> (batch, channels, out_h, out_w)
+        output = output.reshape(out_h, out_w, batch_size, channels)
+        output = jnp.transpose(output, (2, 3, 0, 1))
+
+    return output
+
+
+def adaptive_max_pool(
+    inputs, output_size, data_format="channels_last", name=None
+):
+    """
+    Adaptive max pooling for JAX backend (PyTorch-compatible).
+    """
+    # Convert output_size to tuple
+    spatial_dims = inputs.ndim - 2
+    if isinstance(output_size, int):
+        output_size = (output_size,) * spatial_dims
+    else:
+        output_size = tuple(output_size)
+
+    # Get spatial shape
+    if data_format == "channels_last":
+        batch_size = inputs.shape[0]
+        channels = inputs.shape[-1]
+        spatial_shape = inputs.shape[1:-1]
+    else:  # channels_first
+        batch_size = inputs.shape[0]
+        channels = inputs.shape[1]
+        spatial_shape = inputs.shape[2:]
+
+    if len(output_size) != 2:
+        raise NotImplementedError(
+            "Only 2D adaptive pooling is currently supported"
+        )
+
+    out_h, out_w = output_size
+    in_h, in_w = spatial_shape
+
+    # Build output by iterating over output positions
+    result_list = []
+
+    for i in range(out_h):
+        for j in range(out_w):
+            # Calculate pooling region for this output position
+            start_h = jnp.floor((i * in_h) / out_h).astype(jnp.int32)
+            end_h = jnp.ceil(((i + 1) * in_h) / out_h).astype(jnp.int32)
+            start_w = jnp.floor((j * in_w) / out_w).astype(jnp.int32)
+            end_w = jnp.ceil(((j + 1) * in_w) / out_w).astype(jnp.int32)
+
+            # Extract region and apply max pooling
+            if data_format == "channels_last":
+                region = inputs[:, start_h:end_h, start_w:end_w, :]
+                # Max over spatial dimensions (axis 1, 2)
+                pooled = jnp.max(region, axis=(1, 2))
+            else:  # channels_first
+                region = inputs[:, :, start_h:end_h, start_w:end_w]
+                # Max over spatial dimensions (axis 2, 3)
+                pooled = jnp.max(region, axis=(2, 3))
+
+            result_list.append(pooled)
+
+    # Stack results: (out_h*out_w, batch, channels)
+    output = jnp.stack(result_list, axis=0)
+
+    # Reshape and transpose to correct output shape
+    if data_format == "channels_last":
+        # (out_h*out_w, batch, channels) -> (batch, out_h, out_w, channels)
+        output = output.reshape(out_h, out_w, batch_size, channels)
+        output = jnp.transpose(output, (2, 0, 1, 3))
+    else:  # channels_first
+        # (out_h*out_w, batch, channels) -> (batch, channels, out_h, out_w)
+        output = output.reshape(out_h, out_w, batch_size, channels)
+        output = jnp.transpose(output, (2, 3, 0, 1))
+
+    return output
diff --git a/keras/src/layers/__init__.py b/keras/src/layers/__init__.py
index febdcef15a98..cf5a0595ca10 100644
--- a/keras/src/layers/__init__.py
+++ b/keras/src/layers/__init__.py
@@ -63,6 +63,10 @@
     SpectralNormalization,
 )
 from keras.src.layers.normalization.unit_normalization import UnitNormalization
+from keras.src.layers.pooling.adaptive_average_pooling2d import (
+    AdaptiveAveragePooling2D,
+)
+from keras.src.layers.pooling.adaptive_max_pooling2d import AdaptiveMaxPooling2D
 from keras.src.layers.pooling.average_pooling1d import AveragePooling1D
 from keras.src.layers.pooling.average_pooling2d import AveragePooling2D
 from keras.src.layers.pooling.average_pooling3d import AveragePooling3D
diff --git a/keras/src/layers/pooling/__init__.py b/keras/src/layers/pooling/__init__.py
index e69de29bb2d1..edea894680d8 100644
--- a/keras/src/layers/pooling/__init__.py
+++ b/keras/src/layers/pooling/__init__.py
@@ -0,0 +1,4 @@
+from keras.src.layers.pooling.adaptive_average_pooling2d import (
+    AdaptiveAveragePooling2D,
+)
+from keras.src.layers.pooling.adaptive_max_pooling2d import AdaptiveMaxPooling2D
diff --git a/keras/src/layers/pooling/adaptive_average_pooling2d.py b/keras/src/layers/pooling/adaptive_average_pooling2d.py
new file mode 100644
index 000000000000..a2714b33fe5b
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_average_pooling2d.py
@@ -0,0 +1,112 @@
+"""Adaptive Average Pooling 2D layer."""
+
+from keras import config
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.AdaptiveAveragePooling2D")
+class AdaptiveAveragePooling2D(Layer):
+    """Adaptive average pooling operation for 2D spatial data.
+
+    This layer applies an adaptive average pooling operation, which pools the
+    input such that the output has a target shape specified by `output_size`,
+    regardless of the input shape. The kernel size and stride are automatically
+    computed to achieve the target output size.
+
+    Args:
+        output_size: Integer or tuple of 2 integers, specifying the target
+            output size `(height, width)`. If a single integer is provided,
+            the same value is used for both dimensions.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, height, width)`. Defaults to the value found in
+            your Keras config file at `~/.keras/keras.json`. If never set, then
+            "channels_last" will be used.
+
+    Input shape:
+        - If `data_format="channels_last"`:
+            4D tensor with shape `(batch_size, height, width, channels)`.
+        - If `data_format="channels_first"`:
+            4D tensor with shape `(batch_size, channels, height, width)`.
+
+    Output shape:
+        - If `data_format="channels_last"`:
+            4D tensor with shape
+            `(batch_size, output_height, output_width, channels)`.
+        - If `data_format="channels_first"`:
+            4D tensor with shape
+            `(batch_size, channels, output_height, output_width)`.
+
+    Examples:
+
+    >>> input_img = np.random.rand(1, 64, 64, 3)
+    >>> layer = keras.layers.AdaptiveAveragePooling2D(output_size=(32, 32))
+    >>> output_img = layer(input_img)
+    >>> output_img.shape
+    (1, 32, 32, 3)
+
+    >>> # Single integer for square output
+    >>> layer = keras.layers.AdaptiveAveragePooling2D(output_size=7)
+    >>> output_img = layer(input_img)
+    >>> output_img.shape
+    (1, 7, 7, 3)
+    """
+
+    def __init__(self, output_size, data_format=None, **kwargs):
+        super().__init__(**kwargs)
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size)
+        elif isinstance(output_size, (list, tuple)):
+            if len(output_size) != 2:
+                raise ValueError(
+                    f"`output_size` must be an integer or tuple of 2 integers. "
+                    f"Received: output_size={output_size}"
+                )
+            self.output_size = tuple(output_size)
+        else:
+            raise TypeError(
+                f"`output_size` must be an integer or tuple of 2 integers. "
+                f"Received: output_size={output_size} of type "
+                f"{type(output_size)}"
+            )
+
+        self.data_format = data_format or config.image_data_format()
+
+        if self.data_format not in {"channels_first", "channels_last"}:
+            raise ValueError(
+                f"Invalid data_format: {self.data_format}. "
+                "Must be either 'channels_first' or 'channels_last'."
+            )
+
+    def call(self, inputs):
+        return ops.adaptive_avg_pool(
+            inputs, output_size=self.output_size, data_format=self.data_format
+        )
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == "channels_last":
+            return (
+                input_shape[0],
+                self.output_size[0],
+                self.output_size[1],
+                input_shape[3],
+            )
+        else:  # channels_first
+            return (
+                input_shape[0],
+                input_shape[1],
+                self.output_size[0],
+                self.output_size[1],
+            )
+
+    def get_config(self):
+        config_dict = {
+            "output_size": self.output_size,
+            "data_format": self.data_format,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config_dict}
diff --git a/keras/src/layers/pooling/adaptive_max_pooling2d.py b/keras/src/layers/pooling/adaptive_max_pooling2d.py
new file mode 100644
index 000000000000..50f498650d18
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_max_pooling2d.py
@@ -0,0 +1,112 @@
+"""Adaptive Max Pooling 2D layer."""
+
+from keras import config
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.AdaptiveMaxPooling2D")
+class AdaptiveMaxPooling2D(Layer):
+    """Adaptive max pooling operation for 2D spatial data.
+
+    This layer applies an adaptive max pooling operation, which pools the
+    input such that the output has a target shape specified by `output_size`,
+    regardless of the input shape. The kernel size and stride are automatically
+    computed to achieve the target output size.
+
+    Args:
+        output_size: Integer or tuple of 2 integers, specifying the target
+            output size `(height, width)`. If a single integer is provided,
+            the same value is used for both dimensions.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, height, width)`. Defaults to the value found in
+            your Keras config file at `~/.keras/keras.json`. If never set, then
+            "channels_last" will be used.
+
+    Input shape:
+        - If `data_format="channels_last"`:
+            4D tensor with shape `(batch_size, height, width, channels)`.
+        - If `data_format="channels_first"`:
+            4D tensor with shape `(batch_size, channels, height, width)`.
+
+    Output shape:
+        - If `data_format="channels_last"`:
+            4D tensor with shape
+            `(batch_size, output_height, output_width, channels)`.
+        - If `data_format="channels_first"`:
+            4D tensor with shape
+            `(batch_size, channels, output_height, output_width)`.
+
+    Examples:
+
+    >>> input_img = np.random.rand(1, 64, 64, 3)
+    >>> layer = keras.layers.AdaptiveMaxPooling2D(output_size=(32, 32))
+    >>> output_img = layer(input_img)
+    >>> output_img.shape
+    (1, 32, 32, 3)
+
+    >>> # Single integer for square output
+    >>> layer = keras.layers.AdaptiveMaxPooling2D(output_size=7)
+    >>> output_img = layer(input_img)
+    >>> output_img.shape
+    (1, 7, 7, 3)
+    """
+
+    def __init__(self, output_size, data_format=None, **kwargs):
+        super().__init__(**kwargs)
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size)
+        elif isinstance(output_size, (list, tuple)):
+            if len(output_size) != 2:
+                raise ValueError(
+                    f"`output_size` must be an integer or tuple of 2 integers. "
+                    f"Received: output_size={output_size}"
+                )
+            self.output_size = tuple(output_size)
+        else:
+            raise TypeError(
+                f"`output_size` must be an integer or tuple of 2 integers. "
+                f"Received: output_size={output_size} of type "
+                f"{type(output_size)}"
+            )
+
+        self.data_format = data_format or config.image_data_format()
+
+        if self.data_format not in {"channels_first", "channels_last"}:
+            raise ValueError(
+                f"Invalid data_format: {self.data_format}. "
+                "Must be either 'channels_first' or 'channels_last'."
+            )
+
+    def call(self, inputs):
+        return ops.adaptive_max_pool(
+            inputs, output_size=self.output_size, data_format=self.data_format
+        )
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == "channels_last":
+            return (
+                input_shape[0],
+                self.output_size[0],
+                self.output_size[1],
+                input_shape[3],
+            )
+        else:  # channels_first
+            return (
+                input_shape[0],
+                input_shape[1],
+                self.output_size[0],
+                self.output_size[1],
+            )
+
+    def get_config(self):
+        config_dict = {
+            "output_size": self.output_size,
+            "data_format": self.data_format,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config_dict}
diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
new file mode 100644
index 000000000000..f85ce0ec568f
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -0,0 +1,177 @@
+"""Tests for Adaptive Average Pooling 2D layer."""
+
+import numpy as np
+import pytest
+
+from keras.src import layers
+from keras.src import ops
+from keras.src import testing
+
+# Only import torch if available
+try:
+    import torch
+
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+
+class AdaptiveAveragePooling2DTest(testing.TestCase):
+    """Test suite for AdaptiveAveragePooling2D layer."""
+
+    def test_adaptive_avg_pooling_2d_basic(self):
+        """Test basic functionality with square output."""
+        layer = layers.AdaptiveAveragePooling2D(output_size=4)
+        x = np.random.randn(2, 8, 8, 3).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 4, 4, 3))
+
+    def test_adaptive_avg_pooling_2d_rectangular(self):
+        """Test with rectangular output size."""
+        layer = layers.AdaptiveAveragePooling2D(output_size=(2, 4))
+        x = np.random.randn(2, 8, 8, 3).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 2, 4, 3))
+
+    def test_adaptive_avg_pooling_2d_channels_first(self):
+        """Test channels_first data format."""
+        layer = layers.AdaptiveAveragePooling2D(
+            output_size=4, data_format="channels_first"
+        )
+        x = np.random.randn(2, 3, 8, 8).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 3, 4, 4))
+
+    def test_adaptive_avg_pooling_2d_output_shape(self):
+        """Test compute_output_shape method."""
+        layer = layers.AdaptiveAveragePooling2D(output_size=(2, 4))
+        x_shape = (2, 8, 8, 3)
+        output_shape = layer.compute_output_shape(x_shape)
+        self.assertEqual(output_shape, (2, 2, 4, 3))
+
+    def test_adaptive_avg_pooling_2d_invalid_output_size(self):
+        """Test error handling for invalid output_size."""
+        with self.assertRaisesRegex(ValueError, "`output_size` must be"):
+            layers.AdaptiveAveragePooling2D(output_size=(2, 3, 4))
+
+    def test_adaptive_avg_pooling_2d_invalid_data_format(self):
+        """Test error handling for invalid data_format."""
+        with self.assertRaisesRegex(ValueError, "Invalid data_format"):
+            layer = layers.AdaptiveAveragePooling2D(
+                output_size=4, data_format="invalid"
+            )
+            x = np.random.randn(2, 8, 8, 3).astype("float32")
+            layer(x)
+
+    def test_adaptive_avg_pooling_2d_get_config(self):
+        """Test layer serialization."""
+        layer = layers.AdaptiveAveragePooling2D(
+            output_size=(3, 5), data_format="channels_first"
+        )
+        config = layer.get_config()
+        self.assertEqual(config["output_size"], (3, 5))
+        self.assertEqual(config["data_format"], "channels_first")
+
+        # Test reconstruction from config
+        new_layer = layers.AdaptiveAveragePooling2D.from_config(config)
+        self.assertEqual(new_layer.output_size, (3, 5))
+        self.assertEqual(new_layer.data_format, "channels_first")
+
+
+class AdaptiveMaxPooling2DTest(testing.TestCase):
+    """Test suite for AdaptiveMaxPooling2D layer."""
+
+    def test_adaptive_max_pooling_2d_basic(self):
+        """Test basic functionality with square output."""
+        layer = layers.AdaptiveMaxPooling2D(output_size=4)
+        x = np.random.randn(2, 8, 8, 3).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 4, 4, 3))
+
+    def test_adaptive_max_pooling_2d_rectangular(self):
+        """Test with rectangular output size."""
+        layer = layers.AdaptiveMaxPooling2D(output_size=(3, 5))
+        x = np.random.randn(2, 9, 15, 3).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 3, 5, 3))
+
+    def test_adaptive_max_pooling_2d_channels_first(self):
+        """Test channels_first data format."""
+        layer = layers.AdaptiveMaxPooling2D(
+            output_size=4, data_format="channels_first"
+        )
+        x = np.random.randn(2, 3, 8, 8).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 3, 4, 4))
+
+    def test_adaptive_max_pooling_2d_output_shape(self):
+        """Test compute_output_shape method."""
+        layer = layers.AdaptiveMaxPooling2D(output_size=(3, 5))
+        x_shape = (2, 9, 15, 3)
+        output_shape = layer.compute_output_shape(x_shape)
+        self.assertEqual(output_shape, (2, 3, 5, 3))
+
+    def test_adaptive_max_pooling_2d_get_config(self):
+        """Test layer serialization."""
+        layer = layers.AdaptiveMaxPooling2D(
+            output_size=(3, 5), data_format="channels_first"
+        )
+        config = layer.get_config()
+        self.assertEqual(config["output_size"], (3, 5))
+        self.assertEqual(config["data_format"], "channels_first")
+
+        # Test reconstruction from config
+        new_layer = layers.AdaptiveMaxPooling2D.from_config(config)
+        self.assertEqual(new_layer.output_size, (3, 5))
+        self.assertEqual(new_layer.data_format, "channels_first")
+
+
+# Parameterized tests as standalone functions (OUTSIDE classes)
+@pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
+@pytest.mark.parametrize(
+    "output_size", [(4, 4), (2, 2), (3, 5), (1, 1), (7, 9)]
+)
+def test_adaptive_avg_pooling2d_matches_torch(output_size):
+    """Test numerical accuracy against PyTorch implementation."""
+    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)
+
+    # PyTorch
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
+
+    # Keras/JAX
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_avg_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+
+    y_keras_np = np.asarray(y_keras)
+
+    np.testing.assert_allclose(
+        y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
+    )
+
+
+@pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
+@pytest.mark.parametrize(
+    "output_size", [(4, 4), (2, 2), (3, 5), (1, 1), (7, 9)]
+)
+def test_adaptive_max_pooling2d_matches_torch(output_size):
+    """Test numerical accuracy against PyTorch implementation."""
+    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)
+
+    # PyTorch
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_max_pool2d(x_torch, output_size)
+
+    # Keras/JAX
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_max_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+
+    y_keras_np = np.asarray(y_keras)
+
+    np.testing.assert_allclose(
+        y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
+    )
diff --git a/keras/src/ops/nn.py b/keras/src/ops/nn.py
index 23792400ae4e..a398ce7d8c69 100644
--- a/keras/src/ops/nn.py
+++ b/keras/src/ops/nn.py
@@ -2,6 +2,7 @@
 
 import warnings
 
+from keras import config
 from keras.src import backend
 from keras.src.api_export import keras_export
 from keras.src.backend import KerasTensor
@@ -1162,6 +1163,58 @@ def max_pool(
     return backend.nn.max_pool(inputs, pool_size, strides, padding, data_format)
 
 
+@keras_export("keras.ops.adaptive_max_pool")
+def adaptive_max_pool(
+    inputs,
+    output_size,
+    data_format=None,
+):
+    """Adaptive max pooling operation.
+
+    Applies an adaptive max pooling operation that automatically computes the
+    kernel size and stride to pool the input to the specified `output_size`.
+    This operation is useful when you want a fixed output size regardless of
+    input size, commonly used in models like ResNet for global feature
+    extraction.
+    Args:
+        inputs: Tensor of rank 4. Input tensor of shape:
+            - If `data_format="channels_last"`:
+                `(batch_size, height, width, channels)`.
+            - If `data_format="channels_first"`:
+                `(batch_size, channels, height, width)`.
+        output_size: Integer or tuple/list of 2 integers, specifying the target
+            output spatial dimensions `(output_height, output_width)`. If a
+            single
+            integer is provided, the same value is used for both dimensions.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            Defaults to the value found in your Keras config file at
+            `~/.keras/keras.json`. If never set, defaults to `"channels_last"`.
+
+    Returns:
+        A tensor of rank 4 representing the adaptive max pooled result.
+
+    Example:
+
+    >>> x = np.random.rand(2, 64, 64, 3)
+    >>> y = keras.ops.adaptive_max_pool(x, output_size=(32, 32))
+    >>> y.shape
+    (2, 32, 32, 3)
+
+    >>> # Works with any input size
+    >>> x = np.random.rand(2, 100, 80, 3)
+    >>> y = keras.ops.adaptive_max_pool(x, output_size=7)
+    >>> y.shape
+    (2, 7, 7, 3)
+    """
+    if data_format is None:
+        data_format = config.image_data_format()
+    return backend.nn.adaptive_max_pool(
+        inputs,
+        output_size=output_size,
+        data_format=data_format,
+    )
+
+
 class AveragePool(Operation):
     def __init__(
         self,
@@ -1257,6 +1310,60 @@ def average_pool(
     )
 
 
+@keras_export("keras.ops.adaptive_avg_pool")
+def adaptive_avg_pool(
+    inputs,
+    output_size,
+    data_format=None,
+):
+    """Adaptive average pooling operation.
+
+    Applies an adaptive average pooling operation that automatically
+    computes the
+    kernel size and stride to pool the input to the specified `output_size`.
+    This operation is useful when you want a fixed output size regardless of
+    input size, commonly used in models like ResNet for global feature
+    extraction.
+
+    Args:
+        inputs: Tensor of rank 4. Input tensor of shape:
+            - If `data_format="channels_last"`:
+                `(batch_size, height, width, channels)`.
+            - If `data_format="channels_first"`:
+                `(batch_size, channels, height, width)`.
+        output_size: Integer or tuple/list of 2 integers, specifying the target
+            output spatial dimensions `(output_height, output_width)`. If a
+            single
+            integer is provided, the same value is used for both dimensions.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            Defaults to the value found in your Keras config file at
+            `~/.keras/keras.json`. If never set, defaults to `"channels_last"`.
+
+    Returns:
+        A tensor of rank 4 representing the adaptive average pooled result.
+
+    Example:
+
+    >>> x = np.random.rand(2, 64, 64, 3)
+    >>> y = keras.ops.adaptive_avg_pool(x, output_size=(32, 32))
+    >>> y.shape
+    (2, 32, 32, 3)
+
+    >>> # Works with any input size
+    >>> x = np.random.rand(2, 100, 80, 3)
+    >>> y = keras.ops.adaptive_avg_pool(x, output_size=7)
+    >>> y.shape
+    (2, 7, 7, 3)
+    """
+    if data_format is None:
+        data_format = config.image_data_format()
+    return backend.nn.adaptive_avg_pool(
+        inputs,
+        output_size=output_size,
+        data_format=data_format,
+    )
+
+
 class Conv(Operation):
     def __init__(
         self,

From f830e93c39bcb37055991f1407ab1479217b3e13 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Wed, 5 Nov 2025 01:26:30 +0530
Subject: [PATCH 02/13] Add adaptive pooling (adaptive_avg_pool and
 adaptive_max_pool) for JAX, NumPy, PyTorch, and TensorFlow backends

---
 keras/src/backend/jax/nn.py        | 182 +++++++----------------------
 keras/src/backend/numpy/nn.py      |  59 ++++++++++
 keras/src/backend/openvino/nn.py   |  16 +++
 keras/src/backend/tensorflow/nn.py |  84 +++++++++++++
 keras/src/backend/torch/nn.py      |  88 ++++++++++++++
 5 files changed, 291 insertions(+), 138 deletions(-)

diff --git a/keras/src/backend/jax/nn.py b/keras/src/backend/jax/nn.py
index 084ce8d81792..308c0e90d336 100644
--- a/keras/src/backend/jax/nn.py
+++ b/keras/src/backend/jax/nn.py
@@ -1466,153 +1466,59 @@ def _pair(x):
     return patches.reshape(N, CKK, oH * oW)
 
 
-def _adaptive_pool_start_index(output_idx, output_size, input_size):
-    """Calculate start index for adaptive pooling (PyTorch compatible)."""
-    return jnp.floor((output_idx * input_size) / output_size).astype(jnp.int32)
-
-
-def _adaptive_pool_end_index(output_idx, output_size, input_size):
-    """Calculate end index for adaptive pooling (PyTorch compatible)."""
-    return jnp.ceil(((output_idx + 1) * input_size) / output_size).astype(
-        jnp.int32
-    )
-
-
-def adaptive_avg_pool(
-    inputs, output_size, data_format="channels_last", name=None
+def _adaptive_pool(
+    inputs, output_size, data_format="channels_first", pool_fn=jnp.mean
 ):
     """
-    Adaptive average pooling for JAX backend (PyTorch-compatible).
+    Optimized adaptive pooling for JAX backend, fully vectorized and
+    tracer-safe.
     """
-    # Convert output_size to tuple
-    spatial_dims = inputs.ndim - 2
     if isinstance(output_size, int):
-        output_size = (output_size,) * spatial_dims
-    else:
-        output_size = tuple(output_size)
+        output_size = (output_size, output_size)
+    out_h, out_w = output_size
 
-    # Get spatial shape
+    # Handle data format
     if data_format == "channels_last":
-        batch_size = inputs.shape[0]
-        channels = inputs.shape[-1]
-        spatial_shape = inputs.shape[1:-1]
-    else:  # channels_first
-        batch_size = inputs.shape[0]
-        channels = inputs.shape[1]
-        spatial_shape = inputs.shape[2:]
-
-    if len(output_size) != 2:
-        raise NotImplementedError(
-            "Only 2D adaptive pooling is currently supported"
-        )
-
-    out_h, out_w = output_size
-    in_h, in_w = spatial_shape
-
-    # Build output by iterating over output positions
-    result_list = []
-
-    for i in range(out_h):
-        for j in range(out_w):
-            # Calculate pooling region for this output position
-            start_h = jnp.floor((i * in_h) / out_h).astype(jnp.int32)
-            end_h = jnp.ceil(((i + 1) * in_h) / out_h).astype(jnp.int32)
-            start_w = jnp.floor((j * in_w) / out_w).astype(jnp.int32)
-            end_w = jnp.ceil(((j + 1) * in_w) / out_w).astype(jnp.int32)
-
-            # Extract region and apply average pooling
-            if data_format == "channels_last":
-                region = inputs[:, start_h:end_h, start_w:end_w, :]
-                # Average over spatial dimensions (axis 1, 2)
-                pooled = jnp.mean(region, axis=(1, 2))
-            else:  # channels_first
-                region = inputs[:, :, start_h:end_h, start_w:end_w]
-                # Average over spatial dimensions (axis 2, 3)
-                pooled = jnp.mean(region, axis=(2, 3))
-
-            result_list.append(pooled)
-
-    # Stack results: (out_h*out_w, batch, channels)
-    output = jnp.stack(result_list, axis=0)
-
-    # Reshape and transpose to correct output shape
+        inputs = jnp.transpose(inputs, (0, 3, 1, 2))  # NHWC → NCHW
+    n, c, h, w = inputs.shape
+
+    # Precompute static pooling bins as concrete numpy arrays (not traced)
+    h_bins = [
+        (int(jnp.floor(i * h / out_h)), int(jnp.ceil((i + 1) * h / out_h)))
+        for i in range(out_h)
+    ]
+    w_bins = [
+        (int(jnp.floor(j * w / out_w)), int(jnp.ceil((j + 1) * w / out_w)))
+        for j in range(out_w)
+    ]
+
+    # Define pooling over one image (C,H,W)
+    def pool_single_image(img):
+        pooled_rows = []
+        for hs, he in h_bins:
+            pooled_cols = []
+            for ws, we in w_bins:
+                region = img[:, hs:he, ws:we]
+                pooled_cols.append(pool_fn(region, axis=(1, 2)))
+            pooled_rows.append(jnp.stack(pooled_cols, axis=-1))
+        return jnp.stack(pooled_rows, axis=-2)  # (C, out_h, out_w)
+
+    # Vectorize over batch
+    outputs = jax.vmap(pool_single_image)(inputs)  # (N, C, out_h, out_w)
+
+    # Convert back if channels_last
     if data_format == "channels_last":
-        # (out_h*out_w, batch, channels) -> (batch, out_h, out_w, channels)
-        output = output.reshape(out_h, out_w, batch_size, channels)
-        output = jnp.transpose(output, (2, 0, 1, 3))
-    else:  # channels_first
-        # (out_h*out_w, batch, channels) -> (batch, channels, out_h, out_w)
-        output = output.reshape(out_h, out_w, batch_size, channels)
-        output = jnp.transpose(output, (2, 3, 0, 1))
-
-    return output
+        outputs = jnp.transpose(outputs, (0, 2, 3, 1))
+    return outputs
 
 
-def adaptive_max_pool(
-    inputs, output_size, data_format="channels_last", name=None
+def adaptive_avg_pool(
+    inputs, output_size, data_format="channels_first", name=None
 ):
-    """
-    Adaptive max pooling for JAX backend (PyTorch-compatible).
-    """
-    # Convert output_size to tuple
-    spatial_dims = inputs.ndim - 2
-    if isinstance(output_size, int):
-        output_size = (output_size,) * spatial_dims
-    else:
-        output_size = tuple(output_size)
-
-    # Get spatial shape
-    if data_format == "channels_last":
-        batch_size = inputs.shape[0]
-        channels = inputs.shape[-1]
-        spatial_shape = inputs.shape[1:-1]
-    else:  # channels_first
-        batch_size = inputs.shape[0]
-        channels = inputs.shape[1]
-        spatial_shape = inputs.shape[2:]
+    return _adaptive_pool(inputs, output_size, data_format, pool_fn=jnp.mean)
 
-    if len(output_size) != 2:
-        raise NotImplementedError(
-            "Only 2D adaptive pooling is currently supported"
-        )
 
-    out_h, out_w = output_size
-    in_h, in_w = spatial_shape
-
-    # Build output by iterating over output positions
-    result_list = []
-
-    for i in range(out_h):
-        for j in range(out_w):
-            # Calculate pooling region for this output position
-            start_h = jnp.floor((i * in_h) / out_h).astype(jnp.int32)
-            end_h = jnp.ceil(((i + 1) * in_h) / out_h).astype(jnp.int32)
-            start_w = jnp.floor((j * in_w) / out_w).astype(jnp.int32)
-            end_w = jnp.ceil(((j + 1) * in_w) / out_w).astype(jnp.int32)
-
-            # Extract region and apply max pooling
-            if data_format == "channels_last":
-                region = inputs[:, start_h:end_h, start_w:end_w, :]
-                # Max over spatial dimensions (axis 1, 2)
-                pooled = jnp.max(region, axis=(1, 2))
-            else:  # channels_first
-                region = inputs[:, :, start_h:end_h, start_w:end_w]
-                # Max over spatial dimensions (axis 2, 3)
-                pooled = jnp.max(region, axis=(2, 3))
-
-            result_list.append(pooled)
-
-    # Stack results: (out_h*out_w, batch, channels)
-    output = jnp.stack(result_list, axis=0)
-
-    # Reshape and transpose to correct output shape
-    if data_format == "channels_last":
-        # (out_h*out_w, batch, channels) -> (batch, out_h, out_w, channels)
-        output = output.reshape(out_h, out_w, batch_size, channels)
-        output = jnp.transpose(output, (2, 0, 1, 3))
-    else:  # channels_first
-        # (out_h*out_w, batch, channels) -> (batch, channels, out_h, out_w)
-        output = output.reshape(out_h, out_w, batch_size, channels)
-        output = jnp.transpose(output, (2, 3, 0, 1))
-
-    return output
+def adaptive_max_pool(
+    inputs, output_size, data_format="channels_first", name=None
+):
+    return _adaptive_pool(inputs, output_size, data_format, pool_fn=jnp.max)
diff --git a/keras/src/backend/numpy/nn.py b/keras/src/backend/numpy/nn.py
index 44f3fb882e12..ed2ac094fef3 100644
--- a/keras/src/backend/numpy/nn.py
+++ b/keras/src/backend/numpy/nn.py
@@ -1237,3 +1237,62 @@ def _pair(x):
 
     # ---- reshape -> (N, C*kH*kW, L) ----
     return patches.reshape(N, C * k[0] * k[1], -1)
+
+
+def _adaptive_pool2d(inputs, output_size, mode="avg", data_format=None):
+    """Adaptive pooling for 2D inputs."""
+    from keras.src import backend
+
+    data_format = backend.standardize_data_format(data_format)
+    x = convert_to_tensor(inputs)
+
+    if isinstance(output_size, int):
+        out_h = out_w = int(output_size)
+    else:
+        out_h, out_w = output_size
+
+    if data_format == "channels_last":
+        N, H, W, C = x.shape
+        x_nchw = np.transpose(x, (0, 3, 1, 2))
+    else:
+        N, C, H, W = x.shape
+        x_nchw = x
+
+    out = np.empty((N, C, out_h, out_w), dtype=x.dtype)
+
+    for i in range(out_h):
+        h_start = int(np.floor(i * H / out_h))
+        h_end = int(np.ceil((i + 1) * H / out_h))
+        h_start = max(0, min(h_start, H - 1))
+        h_end = max(h_start + 1, min(h_end, H))
+
+        for j in range(out_w):
+            w_start = int(np.floor(j * W / out_w))
+            w_end = int(np.ceil((j + 1) * W / out_w))
+            w_start = max(0, min(w_start, W - 1))
+            w_end = max(w_start + 1, min(w_end, W))
+
+            patch = x_nchw[:, :, h_start:h_end, w_start:w_end]
+
+            if mode == "avg":
+                out[:, :, i, j] = np.mean(patch, axis=(2, 3))
+            else:
+                out[:, :, i, j] = np.max(patch, axis=(2, 3))
+
+    if data_format == "channels_last":
+        return np.transpose(out, (0, 2, 3, 1))
+    return out
+
+
+def adaptive_avg_pool(inputs, output_size, data_format=None):
+    """Adaptive average pooling 2D wrapper."""
+    return _adaptive_pool2d(
+        inputs, output_size, mode="avg", data_format=data_format
+    )
+
+
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    """Adaptive max pooling 2D wrapper."""
+    return _adaptive_pool2d(
+        inputs, output_size, mode="max", data_format=data_format
+    )
diff --git a/keras/src/backend/openvino/nn.py b/keras/src/backend/openvino/nn.py
index 2c025825ed82..2d6daedd18c0 100644
--- a/keras/src/backend/openvino/nn.py
+++ b/keras/src/backend/openvino/nn.py
@@ -133,6 +133,14 @@ def max_pool(
     )
 
 
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    """Adaptive max pooling - OpenVINO backend not yet supported."""
+    raise NotImplementedError(
+        "adaptive_max_pool is not yet supported for OpenVINO backend. "
+        "Please use JAX, NumPy, PyTorch, or TensorFlow backend."
+    )
+
+
 def average_pool(
     inputs,
     pool_size,
@@ -145,6 +153,14 @@ def average_pool(
     )
 
 
+def adaptive_avg_pool(inputs, output_size, data_format=None):
+    """Adaptive average pooling - OpenVINO backend not yet supported."""
+    raise NotImplementedError(
+        "adaptive_avg_pool is not yet supported for OpenVINO backend. "
+        "Please use JAX, NumPy, PyTorch, or TensorFlow backend."
+    )
+
+
 def _adjust_strides_dilation(
     x,
     num_spatial_dims,
diff --git a/keras/src/backend/tensorflow/nn.py b/keras/src/backend/tensorflow/nn.py
index 8a89e6a6b590..a435cf847264 100644
--- a/keras/src/backend/tensorflow/nn.py
+++ b/keras/src/backend/tensorflow/nn.py
@@ -240,6 +240,48 @@ def max_pool(
     return outputs
 
 
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    """Adaptive max pooling 2D for TensorFlow backend."""
+    import tensorflow as tf
+
+    from keras.src import backend
+
+    data_format = backend.standardize_data_format(data_format)
+    x = tf.convert_to_tensor(inputs)
+
+    if isinstance(output_size, int):
+        out_h = out_w = int(output_size)
+    else:
+        out_h, out_w = output_size
+
+    if data_format == "channels_last":
+        N, H, W, C = x.shape
+        x_nchw = tf.transpose(x, [0, 3, 1, 2])
+    else:
+        N, C, H, W = x.shape
+        x_nchw = x
+
+    result_list = []
+    for i in range(out_h):
+        for j in range(out_w):
+            h_start = int(tf.math.floor(i * H / out_h))
+            h_end = int(tf.math.ceil((i + 1) * H / out_h))
+            w_start = int(tf.math.floor(j * W / out_w))
+            w_end = int(tf.math.ceil((j + 1) * W / out_w))
+
+            patch = x_nchw[:, :, h_start:h_end, w_start:w_end]
+            pooled = tf.reduce_max(patch, axis=[2, 3])
+            result_list.append(pooled)
+
+    output = tf.stack(result_list, axis=0)
+    output = tf.reshape(output, [out_h, out_w, N, C])
+    output = tf.transpose(
+        output, [2, 0, 1, 3] if data_format == "channels_last" else [2, 3, 0, 1]
+    )
+
+    return output
+
+
 def average_pool(
     inputs,
     pool_size,
@@ -268,6 +310,48 @@ def average_pool(
     return outputs
 
 
+def adaptive_avg_pool(inputs, output_size, data_format=None):
+    """Adaptive average pooling 2D for TensorFlow backend."""
+    import tensorflow as tf
+
+    from keras.src import backend
+
+    data_format = backend.standardize_data_format(data_format)
+    x = tf.convert_to_tensor(inputs)
+
+    if isinstance(output_size, int):
+        out_h = out_w = int(output_size)
+    else:
+        out_h, out_w = output_size
+
+    if data_format == "channels_last":
+        N, H, W, C = x.shape
+        x_nchw = tf.transpose(x, [0, 3, 1, 2])
+    else:
+        N, C, H, W = x.shape
+        x_nchw = x
+
+    result_list = []
+    for i in range(out_h):
+        for j in range(out_w):
+            h_start = int(tf.math.floor(i * H / out_h))
+            h_end = int(tf.math.ceil((i + 1) * H / out_h))
+            w_start = int(tf.math.floor(j * W / out_w))
+            w_end = int(tf.math.ceil((j + 1) * W / out_w))
+
+            patch = x_nchw[:, :, h_start:h_end, w_start:w_end]
+            pooled = tf.reduce_mean(patch, axis=[2, 3])
+            result_list.append(pooled)
+
+    output = tf.stack(result_list, axis=0)
+    output = tf.reshape(output, [out_h, out_w, N, C])
+    output = tf.transpose(
+        output, [2, 0, 1, 3] if data_format == "channels_last" else [2, 3, 0, 1]
+    )
+
+    return output
+
+
 def _convert_data_format(data_format, ndim):
     if data_format == "channels_last":
         if ndim == 3:
diff --git a/keras/src/backend/torch/nn.py b/keras/src/backend/torch/nn.py
index 85b2a32d5560..3e9fc05a755d 100644
--- a/keras/src/backend/torch/nn.py
+++ b/keras/src/backend/torch/nn.py
@@ -384,6 +384,51 @@ def max_pool(
     return outputs
 
 
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    """Adaptive max pooling (1D/2D/3D) with channels_last support."""
+    inputs = convert_to_tensor(inputs)
+    num_spatial_dims = inputs.ndim - 2
+
+    data_format = backend.standardize_data_format(data_format)
+    orig_format = data_format
+    if data_format == "channels_last":
+        inputs = _transpose_spatial_inputs(inputs)
+
+    if isinstance(output_size, int):
+        torch_output_size = (
+            output_size
+            if num_spatial_dims == 1
+            else (output_size,) * num_spatial_dims
+        )
+    else:
+        torch_output_size = standardize_tuple(
+            output_size, num_spatial_dims, "output_size"
+        )
+
+    if get_device() == "meta":
+        inputs = torch.empty(
+            size=inputs.shape, dtype=inputs.dtype, device="cpu"
+        )
+
+    if num_spatial_dims == 1:
+        res = tnn.adaptive_max_pool1d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 2:
+        res = tnn.adaptive_max_pool2d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 3:
+        res = tnn.adaptive_max_pool3d(inputs, output_size=torch_output_size)
+    else:
+        raise ValueError(
+            "Inputs to adaptive max pooling must have ndim=3, 4 or 5, "
+            f"Received input shape: {inputs.shape}."
+        )
+
+    outputs = res[0] if isinstance(res, tuple) else res
+
+    if orig_format == "channels_last":
+        outputs = _transpose_spatial_outputs(outputs)
+    return outputs
+
+
 def average_pool(
     inputs,
     pool_size,
@@ -458,6 +503,49 @@ def average_pool(
     return outputs
 
 
+def adaptive_avg_pool(inputs, output_size, data_format=None):
+    """Adaptive average pooling (1D/2D/3D) with channels_last support."""
+    inputs = convert_to_tensor(inputs)
+    num_spatial_dims = inputs.ndim - 2
+
+    data_format = backend.standardize_data_format(data_format)
+    orig_format = data_format
+    if data_format == "channels_last":
+        inputs = _transpose_spatial_inputs(inputs)
+
+    if isinstance(output_size, int):
+        torch_output_size = (
+            output_size
+            if num_spatial_dims == 1
+            else (output_size,) * num_spatial_dims
+        )
+    else:
+        torch_output_size = standardize_tuple(
+            output_size, num_spatial_dims, "output_size"
+        )
+
+    if get_device() == "meta":
+        inputs = torch.empty(
+            size=inputs.shape, dtype=inputs.dtype, device="cpu"
+        )
+
+    if num_spatial_dims == 1:
+        outputs = tnn.adaptive_avg_pool1d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 2:
+        outputs = tnn.adaptive_avg_pool2d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 3:
+        outputs = tnn.adaptive_avg_pool3d(inputs, output_size=torch_output_size)
+    else:
+        raise ValueError(
+            "Inputs to adaptive average pooling must have ndim=3, 4 or 5, "
+            f"Received input shape: {inputs.shape}."
+        )
+
+    if orig_format == "channels_last":
+        outputs = _transpose_spatial_outputs(outputs)
+    return outputs
+
+
 def conv(
     inputs,
     kernel,

From 9938ef18b073ebe90441164e87b81e424c445e89 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Fri, 7 Nov 2025 11:58:30 +0530
Subject: [PATCH 03/13] Fix adaptive pooling implementation

---
 keras/src/backend/jax/nn.py                   | 132 ++++++++++++------
 keras/src/backend/numpy/nn.py                 |  31 ++--
 keras/src/backend/openvino/nn.py              |   8 +-
 keras/src/backend/tensorflow/nn.py            |  82 +----------
 .../layers/pooling/adaptive_pooling2d_test.py |  56 ++++++++
 .../pooling/benchmark_adaptive_pooling.py     |  95 +++++++++++++
 .../pooling/test_training_adaptive_pooling.py |  95 +++++++++++++
 7 files changed, 358 insertions(+), 141 deletions(-)
 create mode 100644 keras/src/layers/pooling/benchmark_adaptive_pooling.py
 create mode 100644 keras/src/layers/pooling/test_training_adaptive_pooling.py

diff --git a/keras/src/backend/jax/nn.py b/keras/src/backend/jax/nn.py
index 308c0e90d336..e73e53ec100c 100644
--- a/keras/src/backend/jax/nn.py
+++ b/keras/src/backend/jax/nn.py
@@ -1466,59 +1466,99 @@ def _pair(x):
     return patches.reshape(N, CKK, oH * oW)
 
 
-def _adaptive_pool(
-    inputs, output_size, data_format="channels_first", pool_fn=jnp.mean
+def adaptive_avg_pool(
+    inputs, output_size, data_format="channels_first", name=None
 ):
-    """
-    Optimized adaptive pooling for JAX backend, fully vectorized and
-    tracer-safe.
-    """
     if isinstance(output_size, int):
         output_size = (output_size, output_size)
     out_h, out_w = output_size
+    if data_format == "channels_first":
+        inputs = jnp.transpose(inputs, (0, 2, 3, 1))  # NCHW -> NHWC
+    n, h, w, c = inputs.shape
+    if h % out_h == 0 and w % out_w == 0:
+        kernel_h = h // out_h
+        kernel_w = w // out_w
+        stride_h = kernel_h
+        stride_w = kernel_w
+        pooled = lax.reduce_window(
+            inputs,
+            0.0,
+            lax.add,
+            (1, kernel_h, kernel_w, 1),
+            (1, stride_h, stride_w, 1),
+            "VALID",
+        )
+        pooled = pooled / (kernel_h * kernel_w)
+    else:
+        start_h = jnp.arange(out_h, dtype=jnp.int32) * h // out_h
+        end_h = jnp.minimum(
+            ((jnp.arange(out_h, dtype=jnp.int32) + 1) * h + out_h - 1) // out_h,
+            h,
+        )
+        start_w = jnp.arange(out_w, dtype=jnp.int32) * w // out_w
+        end_w = jnp.minimum(
+            ((jnp.arange(out_w, dtype=jnp.int32) + 1) * w + out_w - 1) // out_w,
+            w,
+        )
+        pooled = jnp.zeros((n, out_h, out_w, c), dtype=inputs.dtype)
+        for i in range(out_h):
+            sh = start_h[i]
+            eh = end_h[i]
+            for j in range(out_w):
+                sw = start_w[j]
+                ew = end_w[j]
+                region = inputs[:, sh:eh, sw:ew, :]
+                pooled = pooled.at[:, i, j, :].set(
+                    jnp.mean(region, axis=(1, 2))
+                )
 
-    # Handle data format
-    if data_format == "channels_last":
-        inputs = jnp.transpose(inputs, (0, 3, 1, 2))  # NHWC → NCHW
-    n, c, h, w = inputs.shape
-
-    # Precompute static pooling bins as concrete numpy arrays (not traced)
-    h_bins = [
-        (int(jnp.floor(i * h / out_h)), int(jnp.ceil((i + 1) * h / out_h)))
-        for i in range(out_h)
-    ]
-    w_bins = [
-        (int(jnp.floor(j * w / out_w)), int(jnp.ceil((j + 1) * w / out_w)))
-        for j in range(out_w)
-    ]
-
-    # Define pooling over one image (C,H,W)
-    def pool_single_image(img):
-        pooled_rows = []
-        for hs, he in h_bins:
-            pooled_cols = []
-            for ws, we in w_bins:
-                region = img[:, hs:he, ws:we]
-                pooled_cols.append(pool_fn(region, axis=(1, 2)))
-            pooled_rows.append(jnp.stack(pooled_cols, axis=-1))
-        return jnp.stack(pooled_rows, axis=-2)  # (C, out_h, out_w)
-
-    # Vectorize over batch
-    outputs = jax.vmap(pool_single_image)(inputs)  # (N, C, out_h, out_w)
-
-    # Convert back if channels_last
-    if data_format == "channels_last":
-        outputs = jnp.transpose(outputs, (0, 2, 3, 1))
-    return outputs
-
-
-def adaptive_avg_pool(
-    inputs, output_size, data_format="channels_first", name=None
-):
-    return _adaptive_pool(inputs, output_size, data_format, pool_fn=jnp.mean)
+    if data_format == "channels_first":
+        pooled = jnp.transpose(pooled, (0, 3, 1, 2))  # NHWC -> NCHW
+    return pooled
 
 
 def adaptive_max_pool(
     inputs, output_size, data_format="channels_first", name=None
 ):
-    return _adaptive_pool(inputs, output_size, data_format, pool_fn=jnp.max)
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size)
+    out_h, out_w = output_size
+    if data_format == "channels_first":
+        inputs = jnp.transpose(inputs, (0, 2, 3, 1))  # NCHW -> NHWC
+    n, h, w, c = inputs.shape
+    if h % out_h == 0 and w % out_w == 0:
+        kernel_h = h // out_h
+        kernel_w = w // out_w
+        stride_h = kernel_h
+        stride_w = kernel_w
+        pooled = lax.reduce_window(
+            inputs,
+            -jnp.inf,
+            lax.max,
+            (1, kernel_h, kernel_w, 1),
+            (1, stride_h, stride_w, 1),
+            "VALID",
+        )
+    else:
+        start_h = jnp.arange(out_h, dtype=jnp.int32) * h // out_h
+        end_h = jnp.minimum(
+            ((jnp.arange(out_h, dtype=jnp.int32) + 1) * h + out_h - 1) // out_h,
+            h,
+        )
+        start_w = jnp.arange(out_w, dtype=jnp.int32) * w // out_w
+        end_w = jnp.minimum(
+            ((jnp.arange(out_w, dtype=jnp.int32) + 1) * w + out_w - 1) // out_w,
+            w,
+        )
+        pooled = jnp.zeros((n, out_h, out_w, c), dtype=inputs.dtype)
+        for i in range(out_h):
+            sh = start_h[i]
+            eh = end_h[i]
+            for j in range(out_w):
+                sw = start_w[j]
+                ew = end_w[j]
+                region = inputs[:, sh:eh, sw:ew, :]
+                pooled = pooled.at[:, i, j, :].set(jnp.max(region, axis=(1, 2)))
+    if data_format == "channels_first":
+        pooled = jnp.transpose(pooled, (0, 3, 1, 2))  # NHWC -> NCHW
+    return pooled
diff --git a/keras/src/backend/numpy/nn.py b/keras/src/backend/numpy/nn.py
index ed2ac094fef3..d9034aa5da28 100644
--- a/keras/src/backend/numpy/nn.py
+++ b/keras/src/backend/numpy/nn.py
@@ -1241,13 +1241,11 @@ def _pair(x):
 
 def _adaptive_pool2d(inputs, output_size, mode="avg", data_format=None):
     """Adaptive pooling for 2D inputs."""
-    from keras.src import backend
-
     data_format = backend.standardize_data_format(data_format)
     x = convert_to_tensor(inputs)
 
     if isinstance(output_size, int):
-        out_h = out_w = int(output_size)
+        out_h = out_w = output_size
     else:
         out_h, out_w = output_size
 
@@ -1258,22 +1256,25 @@ def _adaptive_pool2d(inputs, output_size, mode="avg", data_format=None):
         N, C, H, W = x.shape
         x_nchw = x
 
+    # Precompute start and end indices using integer arithmetic
+    h_starts = np.array([i * H // out_h for i in range(out_h)], dtype=int)
+    h_ends = np.array(
+        [min(((i + 1) * H + out_h - 1) // out_h, H) for i in range(out_h)],
+        dtype=int,
+    )
+    w_starts = np.array([j * W // out_w for j in range(out_w)], dtype=int)
+    w_ends = np.array(
+        [min(((j + 1) * W + out_w - 1) // out_w, W) for j in range(out_w)],
+        dtype=int,
+    )
+
     out = np.empty((N, C, out_h, out_w), dtype=x.dtype)
 
     for i in range(out_h):
-        h_start = int(np.floor(i * H / out_h))
-        h_end = int(np.ceil((i + 1) * H / out_h))
-        h_start = max(0, min(h_start, H - 1))
-        h_end = max(h_start + 1, min(h_end, H))
-
         for j in range(out_w):
-            w_start = int(np.floor(j * W / out_w))
-            w_end = int(np.ceil((j + 1) * W / out_w))
-            w_start = max(0, min(w_start, W - 1))
-            w_end = max(w_start + 1, min(w_end, W))
-
-            patch = x_nchw[:, :, h_start:h_end, w_start:w_end]
-
+            patch = x_nchw[
+                :, :, h_starts[i] : h_ends[i], w_starts[j] : w_ends[j]
+            ]
             if mode == "avg":
                 out[:, :, i, j] = np.mean(patch, axis=(2, 3))
             else:
diff --git a/keras/src/backend/openvino/nn.py b/keras/src/backend/openvino/nn.py
index 2d6daedd18c0..88b8b746a875 100644
--- a/keras/src/backend/openvino/nn.py
+++ b/keras/src/backend/openvino/nn.py
@@ -136,8 +136,8 @@ def max_pool(
 def adaptive_max_pool(inputs, output_size, data_format=None):
     """Adaptive max pooling - OpenVINO backend not yet supported."""
     raise NotImplementedError(
-        "adaptive_max_pool is not yet supported for OpenVINO backend. "
-        "Please use JAX, NumPy, PyTorch, or TensorFlow backend."
+        "Adaptive pooling not implemented for OpenVINO. "
+        "Use JAX or Torch backend."
     )
 
 
@@ -156,8 +156,8 @@ def average_pool(
 def adaptive_avg_pool(inputs, output_size, data_format=None):
     """Adaptive average pooling - OpenVINO backend not yet supported."""
     raise NotImplementedError(
-        "adaptive_avg_pool is not yet supported for OpenVINO backend. "
-        "Please use JAX, NumPy, PyTorch, or TensorFlow backend."
+        "Adaptive pooling not implemented for OpenVINO. "
+        "Use JAX or Torch backend."
     )
 
 
diff --git a/keras/src/backend/tensorflow/nn.py b/keras/src/backend/tensorflow/nn.py
index a435cf847264..cc86cd23c358 100644
--- a/keras/src/backend/tensorflow/nn.py
+++ b/keras/src/backend/tensorflow/nn.py
@@ -241,46 +241,11 @@ def max_pool(
 
 
 def adaptive_max_pool(inputs, output_size, data_format=None):
-    """Adaptive max pooling 2D for TensorFlow backend."""
-    import tensorflow as tf
-
-    from keras.src import backend
-
-    data_format = backend.standardize_data_format(data_format)
-    x = tf.convert_to_tensor(inputs)
-
-    if isinstance(output_size, int):
-        out_h = out_w = int(output_size)
-    else:
-        out_h, out_w = output_size
-
-    if data_format == "channels_last":
-        N, H, W, C = x.shape
-        x_nchw = tf.transpose(x, [0, 3, 1, 2])
-    else:
-        N, C, H, W = x.shape
-        x_nchw = x
-
-    result_list = []
-    for i in range(out_h):
-        for j in range(out_w):
-            h_start = int(tf.math.floor(i * H / out_h))
-            h_end = int(tf.math.ceil((i + 1) * H / out_h))
-            w_start = int(tf.math.floor(j * W / out_w))
-            w_end = int(tf.math.ceil((j + 1) * W / out_w))
-
-            patch = x_nchw[:, :, h_start:h_end, w_start:w_end]
-            pooled = tf.reduce_max(patch, axis=[2, 3])
-            result_list.append(pooled)
-
-    output = tf.stack(result_list, axis=0)
-    output = tf.reshape(output, [out_h, out_w, N, C])
-    output = tf.transpose(
-        output, [2, 0, 1, 3] if data_format == "channels_last" else [2, 3, 0, 1]
+    raise NotImplementedError(
+        "Adaptive pooling not implemented for TensorFlow.  "
+        "Use JAX or Torch backend."
     )
 
-    return output
-
 
 def average_pool(
     inputs,
@@ -311,46 +276,11 @@ def average_pool(
 
 
 def adaptive_avg_pool(inputs, output_size, data_format=None):
-    """Adaptive average pooling 2D for TensorFlow backend."""
-    import tensorflow as tf
-
-    from keras.src import backend
-
-    data_format = backend.standardize_data_format(data_format)
-    x = tf.convert_to_tensor(inputs)
-
-    if isinstance(output_size, int):
-        out_h = out_w = int(output_size)
-    else:
-        out_h, out_w = output_size
-
-    if data_format == "channels_last":
-        N, H, W, C = x.shape
-        x_nchw = tf.transpose(x, [0, 3, 1, 2])
-    else:
-        N, C, H, W = x.shape
-        x_nchw = x
-
-    result_list = []
-    for i in range(out_h):
-        for j in range(out_w):
-            h_start = int(tf.math.floor(i * H / out_h))
-            h_end = int(tf.math.ceil((i + 1) * H / out_h))
-            w_start = int(tf.math.floor(j * W / out_w))
-            w_end = int(tf.math.ceil((j + 1) * W / out_w))
-
-            patch = x_nchw[:, :, h_start:h_end, w_start:w_end]
-            pooled = tf.reduce_mean(patch, axis=[2, 3])
-            result_list.append(pooled)
-
-    output = tf.stack(result_list, axis=0)
-    output = tf.reshape(output, [out_h, out_w, N, C])
-    output = tf.transpose(
-        output, [2, 0, 1, 3] if data_format == "channels_last" else [2, 3, 0, 1]
+    raise NotImplementedError(
+        "Adaptive pooling not implemented for TensorFlow.  "
+        "Use JAX or Torch backend."
     )
 
-    return output
-
 
 def _convert_data_format(data_format, ndim):
     if data_format == "channels_last":
diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
index f85ce0ec568f..d88ecafe9a8b 100644
--- a/keras/src/layers/pooling/adaptive_pooling2d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -175,3 +175,59 @@ def test_adaptive_max_pooling2d_matches_torch(output_size):
     np.testing.assert_allclose(
         y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
     )
+
+
+@pytest.mark.parametrize("output_size", [(4, 4), (7, 7), (1, 1)])
+@pytest.mark.parametrize("input_shape", [(2, 3, 8, 8), (4, 64, 224, 224)])
+def test_adaptive_avg_pool_numerical_equivalence(input_shape, output_size):
+    """Test numerical equivalence with PyTorch across multiple shapes."""
+    # Set seed for reproducibility
+    np.random.seed(42)
+    torch.manual_seed(42)
+
+    x_np = np.random.randn(*input_shape).astype(np.float32)
+
+    # PyTorch reference
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
+    y_torch_np = y_torch.detach().cpu().numpy()
+
+    # Keras/JAX
+    from keras.src import ops
+
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_avg_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+    y_keras_np = np.array(y_keras)
+
+    # Compare with appropriate tolerance for float32
+    np.testing.assert_allclose(y_keras_np, y_torch_np, rtol=1e-5, atol=1e-5)
+
+
+@pytest.mark.parametrize("output_size", [(4, 4), (7, 7), (1, 1)])
+@pytest.mark.parametrize("input_shape", [(2, 3, 8, 8), (4, 64, 224, 224)])
+def test_adaptive_max_pool_numerical_equivalence(input_shape, output_size):
+    """Test numerical equivalence with PyTorch across multiple shapes."""
+    # Set seed for reproducibility
+    np.random.seed(42)
+    torch.manual_seed(42)
+
+    x_np = np.random.randn(*input_shape).astype(np.float32)
+
+    # PyTorch reference
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_max_pool2d(x_torch, output_size)
+    y_torch_np = y_torch.detach().cpu().numpy()
+
+    # Keras/JAX
+    from keras.src import ops
+
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_max_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+    y_keras_np = np.array(y_keras)
+
+    # Compare with appropriate tolerance for float32
+    np.testing.assert_allclose(y_keras_np, y_torch_np, rtol=1e-5, atol=1e-5)
diff --git a/keras/src/layers/pooling/benchmark_adaptive_pooling.py b/keras/src/layers/pooling/benchmark_adaptive_pooling.py
new file mode 100644
index 000000000000..778c3fde5345
--- /dev/null
+++ b/keras/src/layers/pooling/benchmark_adaptive_pooling.py
@@ -0,0 +1,95 @@
+# MUST be set BEFORE any imports
+# MUST be set BEFORE any imports
+import os
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+os.environ["KERAS_BACKEND"] = "jax"  # choose 'jax' or set externally
+os.environ["JAX_PLATFORMS"] = "cpu"  # or 'gpu' if configured
+
+import time
+
+import jax.numpy as jnp
+import numpy as np
+
+# Library imports must be after env vars above
+import torch
+
+from keras.src.backend.jax.nn import adaptive_avg_pool as jax_adaptive_avg_pool
+
+# Test configurations
+test_cases = [
+    (32, 3, 64, 64, 4, 4),  # Small
+    (32, 3, 224, 224, 7, 7),  # Medium (ImageNet)
+    (32, 3, 512, 512, 14, 14),  # Large
+]
+
+print("=" * 80)
+print("🔥 Adaptive Average Pooling Benchmark")
+print("=" * 80)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"PyTorch device: {device.upper()}")
+print(f"JAX platform: {os.environ.get('JAX_PLATFORMS')}")
+print("-" * 80)
+
+for batch_size, channels, input_h, input_w, output_h, output_w in test_cases:
+    print(f"\nInput: {input_h}x{input_w} → Output: {output_h}x{output_w}")
+    print(f"Batch: {batch_size}, Channels: {channels}")
+    print("-" * 70)
+
+    x_np = np.random.randn(batch_size, channels, input_h, input_w).astype(
+        np.float32
+    )
+
+    output_size = (output_h, output_w)
+
+    # --- PyTorch benchmark ---
+    try:
+        x_torch = torch.tensor(x_np, device=device)
+        # Warmup
+        for _ in range(5):
+            _ = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
+        if device == "cuda":
+            torch.cuda.synchronize()
+
+        # Benchmark
+        start = time.perf_counter()
+        for _ in range(50):
+            y_torch = torch.nn.functional.adaptive_avg_pool2d(
+                x_torch,
+                output_size,
+            )
+        if device == "cuda":
+            torch.cuda.synchronize()
+        torch_time = (time.perf_counter() - start) / 50 * 1000
+        print(f"  PyTorch:      {torch_time:.4f} ms")
+    except Exception as e:
+        print(f"  PyTorch:      Error - {str(e)[:60]}")
+
+    # --- JAX benchmark ---
+    try:
+        x_jax = jnp.array(x_np)
+        # Warmup
+        for _ in range(5):
+            jax_adaptive_avg_pool(
+                x_jax,
+                output_size,
+                data_format="channels_first",
+            ).block_until_ready()
+
+        # Benchmark
+        start = time.perf_counter()
+        for _ in range(50):
+            jax_adaptive_avg_pool(
+                x_jax,
+                output_size,
+                data_format="channels_first",
+            ).block_until_ready()
+        jax_time = (time.perf_counter() - start) / 50 * 1000
+        print(f"  JAX (Keras):  {jax_time:.4f} ms")
+    except Exception as e:
+        print(f"  JAX (Keras):  Error - {str(e)[:60]}")
+
+print("\n" + "=" * 80)
+print("✅ Benchmark complete!")
+print("=" * 80)
diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
new file mode 100644
index 000000000000..a00ef54f6762
--- /dev/null
+++ b/keras/src/layers/pooling/test_training_adaptive_pooling.py
@@ -0,0 +1,95 @@
+import os
+
+os.environ["KERAS_BACKEND"] = "torch"
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+import time
+
+import numpy as np
+import torch
+
+import keras
+from keras.src import layers
+from keras.src import models
+
+print("=" * 80)
+print("🚀 Real GPU Training Test with Adaptive Pooling (Torch Backend)")
+print("=" * 80)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"💻 Running on: {device.upper()}")
+if device == "cuda":
+    print(f"🔥 GPU: {torch.cuda.get_device_name(0)}")
+print(f"🔧 Backend: {keras.backend.backend()}")
+print(f"📦 Keras Version: {keras.__version__}")
+print(f"🧠 Torch Version: {torch.__version__}")
+
+np.random.seed(42)
+x_train = np.random.randn(1000, 32, 32, 3).astype(np.float32)
+y_train = np.random.randint(0, 10, 1000)
+x_val = np.random.randn(200, 32, 32, 3).astype(np.float32)
+y_val = np.random.randint(0, 10, 200)
+
+
+def make_model(pool_type="avg"):
+    pool_layer = (
+        layers.AdaptiveAveragePooling2D((4, 4))
+        if pool_type == "avg"
+        else layers.AdaptiveMaxPooling2D((4, 4))
+    )
+    return models.Sequential(
+        [
+            layers.Input(shape=(32, 32, 3)),
+            layers.Conv2D(32, 3, activation="relu", padding="same"),
+            layers.BatchNormalization(),
+            layers.Conv2D(64, 3, activation="relu", padding="same"),
+            pool_layer,
+            layers.Flatten(),
+            layers.Dense(128, activation="relu"),
+            layers.Dropout(0.5),
+            layers.Dense(10, activation="softmax"),
+        ]
+    )
+
+
+for pool in ["avg", "max"]:
+    print("\n" + "=" * 80)
+    print(f"🔹 Training Model with Adaptive{pool.capitalize()}Pooling2D")
+    print("=" * 80)
+
+    model = make_model(pool)
+    model.compile(
+        optimizer="adam",
+        loss="sparse_categorical_crossentropy",
+        metrics=["accuracy"],
+    )
+
+    print("\n🧠 Model Summary:")
+    model.summary()
+
+    start = time.time()
+    history = model.fit(
+        x_train,
+        y_train,
+        validation_data=(x_val, y_val),
+        epochs=3,
+        batch_size=32,
+        verbose=2,
+    )
+    elapsed = time.time() - start
+
+    print(f"\n✅ {pool.capitalize()}Pooling2D Training Done")
+    print(f"⏱️  Training time: {elapsed:.2f}s")
+    print(f"📈 Final training accuracy: {history.history['accuracy'][-1]:.4f}")
+    print(
+        "📊 Final validation accuracy: "
+        f"{history.history['val_accuracy'][-1]:.4f}"
+    )
+
+    test_input = np.random.randn(1, 32, 32, 3).astype(np.float32)
+    preds = model.predict(test_input, verbose=0)
+    print(f"✓ Inference OK - Output shape: {preds.shape}")
+
+print("\n" + "=" * 80)
+print("🏁 All Adaptive Pooling Tests Completed Successfully on Torch GPU")
+print("=" * 80)

From 323a1ab5ea9424876fcf7b952ace7bbb7c065632 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Fri, 7 Nov 2025 13:53:00 +0530
Subject: [PATCH 04/13] Fix adaptive pooling implementation

---
 .../layers/pooling/adaptive_pooling2d_test.py | 108 +++++-------------
 .../pooling/test_training_adaptive_pooling.py |  45 +++++---
 2 files changed, 63 insertions(+), 90 deletions(-)

diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
index d88ecafe9a8b..79850fada1c6 100644
--- a/keras/src/layers/pooling/adaptive_pooling2d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -1,4 +1,4 @@
-"""Tests for Adaptive Average Pooling 2D layer."""
+"""Tests for Adaptive Average and Max Pooling 2D layers."""
 
 import numpy as np
 import pytest
@@ -7,7 +7,6 @@
 from keras.src import ops
 from keras.src import testing
 
-# Only import torch if available
 try:
     import torch
 
@@ -20,16 +19,20 @@ class AdaptiveAveragePooling2DTest(testing.TestCase):
     """Test suite for AdaptiveAveragePooling2D layer."""
 
     def test_adaptive_avg_pooling_2d_basic(self):
-        """Test basic functionality with square output."""
-        layer = layers.AdaptiveAveragePooling2D(output_size=4)
-        x = np.random.randn(2, 8, 8, 3).astype("float32")
+        """Test basic functionality with square output, channels_last."""
+        layer = layers.AdaptiveAveragePooling2D(
+            output_size=4, data_format="channels_last"
+        )
+        x = np.random.randn(2, 8, 8, 3).astype("float32")  # NHWC
         y = layer(x)
         self.assertEqual(y.shape, (2, 4, 4, 3))
 
     def test_adaptive_avg_pooling_2d_rectangular(self):
-        """Test with rectangular output size."""
-        layer = layers.AdaptiveAveragePooling2D(output_size=(2, 4))
-        x = np.random.randn(2, 8, 8, 3).astype("float32")
+        """Test with rectangular output size, channels_last."""
+        layer = layers.AdaptiveAveragePooling2D(
+            output_size=(2, 4), data_format="channels_last"
+        )
+        x = np.random.randn(2, 8, 8, 3).astype("float32")  # NHWC
         y = layer(x)
         self.assertEqual(y.shape, (2, 2, 4, 3))
 
@@ -38,13 +41,15 @@ def test_adaptive_avg_pooling_2d_channels_first(self):
         layer = layers.AdaptiveAveragePooling2D(
             output_size=4, data_format="channels_first"
         )
-        x = np.random.randn(2, 3, 8, 8).astype("float32")
+        x = np.random.randn(2, 3, 8, 8).astype("float32")  # NCHW
         y = layer(x)
         self.assertEqual(y.shape, (2, 3, 4, 4))
 
     def test_adaptive_avg_pooling_2d_output_shape(self):
         """Test compute_output_shape method."""
-        layer = layers.AdaptiveAveragePooling2D(output_size=(2, 4))
+        layer = layers.AdaptiveAveragePooling2D(
+            output_size=(2, 4), data_format="channels_last"
+        )
         x_shape = (2, 8, 8, 3)
         output_shape = layer.compute_output_shape(x_shape)
         self.assertEqual(output_shape, (2, 2, 4, 3))
@@ -82,16 +87,20 @@ class AdaptiveMaxPooling2DTest(testing.TestCase):
     """Test suite for AdaptiveMaxPooling2D layer."""
 
     def test_adaptive_max_pooling_2d_basic(self):
-        """Test basic functionality with square output."""
-        layer = layers.AdaptiveMaxPooling2D(output_size=4)
-        x = np.random.randn(2, 8, 8, 3).astype("float32")
+        """Test basic functionality with square output, channels_last."""
+        layer = layers.AdaptiveMaxPooling2D(
+            output_size=4, data_format="channels_last"
+        )
+        x = np.random.randn(2, 8, 8, 3).astype("float32")  # NHWC
         y = layer(x)
         self.assertEqual(y.shape, (2, 4, 4, 3))
 
     def test_adaptive_max_pooling_2d_rectangular(self):
-        """Test with rectangular output size."""
-        layer = layers.AdaptiveMaxPooling2D(output_size=(3, 5))
-        x = np.random.randn(2, 9, 15, 3).astype("float32")
+        """Test with rectangular output size, channels_last."""
+        layer = layers.AdaptiveMaxPooling2D(
+            output_size=(3, 5), data_format="channels_last"
+        )
+        x = np.random.randn(2, 9, 15, 3).astype("float32")  # NHWC
         y = layer(x)
         self.assertEqual(y.shape, (2, 3, 5, 3))
 
@@ -100,13 +109,15 @@ def test_adaptive_max_pooling_2d_channels_first(self):
         layer = layers.AdaptiveMaxPooling2D(
             output_size=4, data_format="channels_first"
         )
-        x = np.random.randn(2, 3, 8, 8).astype("float32")
+        x = np.random.randn(2, 3, 8, 8).astype("float32")  # NCHW
         y = layer(x)
         self.assertEqual(y.shape, (2, 3, 4, 4))
 
     def test_adaptive_max_pooling_2d_output_shape(self):
         """Test compute_output_shape method."""
-        layer = layers.AdaptiveMaxPooling2D(output_size=(3, 5))
+        layer = layers.AdaptiveMaxPooling2D(
+            output_size=(3, 5), data_format="channels_last"
+        )
         x_shape = (2, 9, 15, 3)
         output_shape = layer.compute_output_shape(x_shape)
         self.assertEqual(output_shape, (2, 3, 5, 3))
@@ -126,14 +137,13 @@ def test_adaptive_max_pooling_2d_get_config(self):
         self.assertEqual(new_layer.data_format, "channels_first")
 
 
-# Parameterized tests as standalone functions (OUTSIDE classes)
 @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
 @pytest.mark.parametrize(
     "output_size", [(4, 4), (2, 2), (3, 5), (1, 1), (7, 9)]
 )
 def test_adaptive_avg_pooling2d_matches_torch(output_size):
     """Test numerical accuracy against PyTorch implementation."""
-    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)
+    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)  # NCHW
 
     # PyTorch
     x_torch = torch.tensor(x_np)
@@ -158,7 +168,7 @@ def test_adaptive_avg_pooling2d_matches_torch(output_size):
 )
 def test_adaptive_max_pooling2d_matches_torch(output_size):
     """Test numerical accuracy against PyTorch implementation."""
-    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)
+    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)  # NCHW
 
     # PyTorch
     x_torch = torch.tensor(x_np)
@@ -175,59 +185,3 @@ def test_adaptive_max_pooling2d_matches_torch(output_size):
     np.testing.assert_allclose(
         y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
     )
-
-
-@pytest.mark.parametrize("output_size", [(4, 4), (7, 7), (1, 1)])
-@pytest.mark.parametrize("input_shape", [(2, 3, 8, 8), (4, 64, 224, 224)])
-def test_adaptive_avg_pool_numerical_equivalence(input_shape, output_size):
-    """Test numerical equivalence with PyTorch across multiple shapes."""
-    # Set seed for reproducibility
-    np.random.seed(42)
-    torch.manual_seed(42)
-
-    x_np = np.random.randn(*input_shape).astype(np.float32)
-
-    # PyTorch reference
-    x_torch = torch.tensor(x_np)
-    y_torch = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
-    y_torch_np = y_torch.detach().cpu().numpy()
-
-    # Keras/JAX
-    from keras.src import ops
-
-    x_keras = ops.convert_to_tensor(x_np)
-    y_keras = ops.adaptive_avg_pool(
-        x_keras, output_size=output_size, data_format="channels_first"
-    )
-    y_keras_np = np.array(y_keras)
-
-    # Compare with appropriate tolerance for float32
-    np.testing.assert_allclose(y_keras_np, y_torch_np, rtol=1e-5, atol=1e-5)
-
-
-@pytest.mark.parametrize("output_size", [(4, 4), (7, 7), (1, 1)])
-@pytest.mark.parametrize("input_shape", [(2, 3, 8, 8), (4, 64, 224, 224)])
-def test_adaptive_max_pool_numerical_equivalence(input_shape, output_size):
-    """Test numerical equivalence with PyTorch across multiple shapes."""
-    # Set seed for reproducibility
-    np.random.seed(42)
-    torch.manual_seed(42)
-
-    x_np = np.random.randn(*input_shape).astype(np.float32)
-
-    # PyTorch reference
-    x_torch = torch.tensor(x_np)
-    y_torch = torch.nn.functional.adaptive_max_pool2d(x_torch, output_size)
-    y_torch_np = y_torch.detach().cpu().numpy()
-
-    # Keras/JAX
-    from keras.src import ops
-
-    x_keras = ops.convert_to_tensor(x_np)
-    y_keras = ops.adaptive_max_pool(
-        x_keras, output_size=output_size, data_format="channels_first"
-    )
-    y_keras_np = np.array(y_keras)
-
-    # Compare with appropriate tolerance for float32
-    np.testing.assert_allclose(y_keras_np, y_torch_np, rtol=1e-5, atol=1e-5)
diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
index a00ef54f6762..089359f7cb72 100644
--- a/keras/src/layers/pooling/test_training_adaptive_pooling.py
+++ b/keras/src/layers/pooling/test_training_adaptive_pooling.py
@@ -1,6 +1,6 @@
 import os
 
-os.environ["KERAS_BACKEND"] = "torch"
+os.environ["KERAS_BACKEND"] = "torch"  # Force Torch backend
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 
 import time
@@ -9,40 +9,59 @@
 import torch
 
 import keras
+from keras.src import backend as K
 from keras.src import layers
 from keras.src import models
 
+# Skip if not Torch
+if K.backend() != "torch":
+    print(f"⚠️ Skipping: Torch backend required, current backend={K.backend()}")
+    exit(0)
+
 print("=" * 80)
-print("🚀 Real GPU Training Test with Adaptive Pooling (Torch Backend)")
+print("🚀 Torch GPU Adaptive Pooling Training Test")
 print("=" * 80)
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"💻 Running on: {device.upper()}")
 if device == "cuda":
     print(f"🔥 GPU: {torch.cuda.get_device_name(0)}")
-print(f"🔧 Backend: {keras.backend.backend()}")
+print(f"🔧 Backend: {K.backend()}")
 print(f"📦 Keras Version: {keras.__version__}")
 print(f"🧠 Torch Version: {torch.__version__}")
 
+# Data in channels-first format
 np.random.seed(42)
-x_train = np.random.randn(1000, 32, 32, 3).astype(np.float32)
+x_train = np.random.randn(1000, 3, 32, 32).astype(np.float32)
 y_train = np.random.randint(0, 10, 1000)
-x_val = np.random.randn(200, 32, 32, 3).astype(np.float32)
+x_val = np.random.randn(200, 3, 32, 32).astype(np.float32)
 y_val = np.random.randint(0, 10, 200)
 
 
 def make_model(pool_type="avg"):
     pool_layer = (
-        layers.AdaptiveAveragePooling2D((4, 4))
+        layers.AdaptiveAveragePooling2D((4, 4), data_format="channels_first")
         if pool_type == "avg"
-        else layers.AdaptiveMaxPooling2D((4, 4))
+        else layers.AdaptiveMaxPooling2D((4, 4), data_format="channels_first")
     )
     return models.Sequential(
         [
-            layers.Input(shape=(32, 32, 3)),
-            layers.Conv2D(32, 3, activation="relu", padding="same"),
-            layers.BatchNormalization(),
-            layers.Conv2D(64, 3, activation="relu", padding="same"),
+            layers.Input(shape=(3, 32, 32)),
+            layers.Conv2D(
+                32,
+                3,
+                activation="relu",
+                padding="same",
+                data_format="channels_first",
+            ),
+            layers.BatchNormalization(axis=1),
+            layers.Conv2D(
+                64,
+                3,
+                activation="relu",
+                padding="same",
+                data_format="channels_first",
+            ),
             pool_layer,
             layers.Flatten(),
             layers.Dense(128, activation="relu"),
@@ -82,11 +101,11 @@ def make_model(pool_type="avg"):
     print(f"⏱️  Training time: {elapsed:.2f}s")
     print(f"📈 Final training accuracy: {history.history['accuracy'][-1]:.4f}")
     print(
-        "📊 Final validation accuracy: "
+        f"📊 Final validation accuracy: "
         f"{history.history['val_accuracy'][-1]:.4f}"
     )
 
-    test_input = np.random.randn(1, 32, 32, 3).astype(np.float32)
+    test_input = np.random.randn(1, 3, 32, 32).astype(np.float32)
     preds = model.predict(test_input, verbose=0)
     print(f"✓ Inference OK - Output shape: {preds.shape}")
 

From df5722741e1ce01301ce0dc4f6f22a9d0e0c0ddc Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Fri, 7 Nov 2025 14:24:41 +0530
Subject: [PATCH 05/13] Fix adaptive pooling implementation

---
 .../pooling/test_training_adaptive_pooling.py | 96 +++++--------------
 1 file changed, 24 insertions(+), 72 deletions(-)

diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
index 089359f7cb72..7cdf5cd1b042 100644
--- a/keras/src/layers/pooling/test_training_adaptive_pooling.py
+++ b/keras/src/layers/pooling/test_training_adaptive_pooling.py
@@ -1,67 +1,30 @@
-import os
-
-os.environ["KERAS_BACKEND"] = "torch"  # Force Torch backend
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-
-import time
-
+# File: keras/src/layers/pooling/test_training_adaptive_pooling.py
 import numpy as np
-import torch
+import pytest
 
-import keras
 from keras.src import backend as K
 from keras.src import layers
 from keras.src import models
 
-# Skip if not Torch
-if K.backend() != "torch":
-    print(f"⚠️ Skipping: Torch backend required, current backend={K.backend()}")
-    exit(0)
-
-print("=" * 80)
-print("🚀 Torch GPU Adaptive Pooling Training Test")
-print("=" * 80)
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"💻 Running on: {device.upper()}")
-if device == "cuda":
-    print(f"🔥 GPU: {torch.cuda.get_device_name(0)}")
-print(f"🔧 Backend: {K.backend()}")
-print(f"📦 Keras Version: {keras.__version__}")
-print(f"🧠 Torch Version: {torch.__version__}")
-
-# Data in channels-first format
 np.random.seed(42)
-x_train = np.random.randn(1000, 3, 32, 32).astype(np.float32)
+x_train = np.random.randn(1000, 32, 32, 3).astype(np.float32)
 y_train = np.random.randint(0, 10, 1000)
-x_val = np.random.randn(200, 3, 32, 32).astype(np.float32)
+x_val = np.random.randn(200, 32, 32, 3).astype(np.float32)
 y_val = np.random.randint(0, 10, 200)
 
 
 def make_model(pool_type="avg"):
     pool_layer = (
-        layers.AdaptiveAveragePooling2D((4, 4), data_format="channels_first")
+        layers.AdaptiveAveragePooling2D((4, 4))
         if pool_type == "avg"
-        else layers.AdaptiveMaxPooling2D((4, 4), data_format="channels_first")
+        else layers.AdaptiveMaxPooling2D((4, 4))
     )
     return models.Sequential(
         [
-            layers.Input(shape=(3, 32, 32)),
-            layers.Conv2D(
-                32,
-                3,
-                activation="relu",
-                padding="same",
-                data_format="channels_first",
-            ),
-            layers.BatchNormalization(axis=1),
-            layers.Conv2D(
-                64,
-                3,
-                activation="relu",
-                padding="same",
-                data_format="channels_first",
-            ),
+            layers.Input(shape=(32, 32, 3)),
+            layers.Conv2D(32, 3, activation="relu", padding="same"),
+            layers.BatchNormalization(),
+            layers.Conv2D(64, 3, activation="relu", padding="same"),
             pool_layer,
             layers.Flatten(),
             layers.Dense(128, activation="relu"),
@@ -71,10 +34,13 @@ def make_model(pool_type="avg"):
     )
 
 
-for pool in ["avg", "max"]:
-    print("\n" + "=" * 80)
-    print(f"🔹 Training Model with Adaptive{pool.capitalize()}Pooling2D")
-    print("=" * 80)
+@pytest.mark.parametrize("pool", ["avg", "max"])
+def test_training_adaptive_pooling(pool):
+    # Skip backends where training is unsupported
+    if K.backend() in ["numpy", "openvino", "tensorflow", "jax"]:
+        pytest.skip(
+            f"fit or adaptive pooling not supported for backend: {K.backend()}"
+        )
 
     model = make_model(pool)
     model.compile(
@@ -83,32 +49,18 @@ def make_model(pool_type="avg"):
         metrics=["accuracy"],
     )
 
-    print("\n🧠 Model Summary:")
-    model.summary()
-
-    start = time.time()
     history = model.fit(
         x_train,
         y_train,
         validation_data=(x_val, y_val),
-        epochs=3,
+        epochs=1,
         batch_size=32,
-        verbose=2,
+        verbose=0,
     )
-    elapsed = time.time() - start
 
-    print(f"\n✅ {pool.capitalize()}Pooling2D Training Done")
-    print(f"⏱️  Training time: {elapsed:.2f}s")
-    print(f"📈 Final training accuracy: {history.history['accuracy'][-1]:.4f}")
-    print(
-        f"📊 Final validation accuracy: "
-        f"{history.history['val_accuracy'][-1]:.4f}"
+    # Basic assertions
+    assert "accuracy" in history.history
+    preds = model.predict(
+        np.random.randn(1, 32, 32, 3).astype(np.float32), verbose=0
     )
-
-    test_input = np.random.randn(1, 3, 32, 32).astype(np.float32)
-    preds = model.predict(test_input, verbose=0)
-    print(f"✓ Inference OK - Output shape: {preds.shape}")
-
-print("\n" + "=" * 80)
-print("🏁 All Adaptive Pooling Tests Completed Successfully on Torch GPU")
-print("=" * 80)
+    assert preds.shape == (1, 10)

From 5343b715ae99d4dc5ac655b251938b4ba1e11c36 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Fri, 7 Nov 2025 14:41:40 +0530
Subject: [PATCH 06/13] Fix adaptive pooling implementation

---
 keras/src/layers/pooling/adaptive_pooling2d_test.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
index 79850fada1c6..0f825a858ef7 100644
--- a/keras/src/layers/pooling/adaptive_pooling2d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -1,4 +1,12 @@
 """Tests for Adaptive Average and Max Pooling 2D layers."""
+import pytest
+SKIP_BACKENDS = [ "openvino", "tensorflow"]
+from keras.src import backend as K
+
+pytestmark = pytest.mark.skipif(
+    K.backend() in SKIP_BACKENDS,
+    reason="Adaptive pooling not implemented for this backend."
+)
 
 import numpy as np
 import pytest

From 4cc8ac0d17c4a5bb7658941816eaf9c20ff17aa0 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Fri, 7 Nov 2025 14:46:15 +0530
Subject: [PATCH 07/13] Fix adaptive pooling implementation

---
 .../layers/pooling/adaptive_pooling2d_test.py   | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
index 0f825a858ef7..f12712f8b055 100644
--- a/keras/src/layers/pooling/adaptive_pooling2d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -1,20 +1,21 @@
 """Tests for Adaptive Average and Max Pooling 2D layers."""
-import pytest
-SKIP_BACKENDS = [ "openvino", "tensorflow"]
-from keras.src import backend as K
-
-pytestmark = pytest.mark.skipif(
-    K.backend() in SKIP_BACKENDS,
-    reason="Adaptive pooling not implemented for this backend."
-)
 
 import numpy as np
 import pytest
 
+from keras.src import backend as K
 from keras.src import layers
 from keras.src import ops
 from keras.src import testing
 
+SKIP_BACKENDS = ["openvino", "tensorflow"]
+
+pytestmark = pytest.mark.skipif(
+    K.backend() in SKIP_BACKENDS,
+    reason=f"Adaptive pooling tests not supported for backend: {K.backend()}",
+)
+
+
 try:
     import torch
 

From 12edcb4d5c59724171af4ea134017d097ed12c9d Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Sat, 8 Nov 2025 23:11:06 +0530
Subject: [PATCH 08/13] Fix adaptive pooling implementation

---
 keras/src/backend/jax/nn.py                   | 425 ++++++++++++---
 keras/src/backend/numpy/nn.py                 |  60 ---
 keras/src/backend/tensorflow/nn.py            | 497 +++++++++++++++++-
 keras/src/layers/__init__.py                  |   8 +
 keras/src/layers/pooling/__init__.py          |   8 +
 .../pooling/adaptive_average_pooling1d.py     |  84 +++
 .../pooling/adaptive_average_pooling3d.py     | 118 +++++
 .../layers/pooling/adaptive_max_pooling1d.py  |  84 +++
 .../layers/pooling/adaptive_max_pooling3d.py  | 115 ++++
 .../layers/pooling/adaptive_pooling1d_test.py |  93 ++++
 .../layers/pooling/adaptive_pooling2d_test.py | 177 ++-----
 .../layers/pooling/adaptive_pooling3d_test.py |  93 ++++
 .../pooling/benchmark_adaptive_pooling.py     |  71 +--
 .../pooling/test_training_adaptive_pooling.py |   2 +-
 14 files changed, 1517 insertions(+), 318 deletions(-)
 create mode 100644 keras/src/layers/pooling/adaptive_average_pooling1d.py
 create mode 100644 keras/src/layers/pooling/adaptive_average_pooling3d.py
 create mode 100644 keras/src/layers/pooling/adaptive_max_pooling1d.py
 create mode 100644 keras/src/layers/pooling/adaptive_max_pooling3d.py
 create mode 100644 keras/src/layers/pooling/adaptive_pooling1d_test.py
 create mode 100644 keras/src/layers/pooling/adaptive_pooling3d_test.py

diff --git a/keras/src/backend/jax/nn.py b/keras/src/backend/jax/nn.py
index e73e53ec100c..d21e41b86a0b 100644
--- a/keras/src/backend/jax/nn.py
+++ b/keras/src/backend/jax/nn.py
@@ -1466,99 +1466,366 @@ def _pair(x):
     return patches.reshape(N, CKK, oH * oW)
 
 
-def adaptive_avg_pool(
-    inputs, output_size, data_format="channels_first", name=None
-):
+def get_static_window_sizes(input_dim, output_dim):
+    """Calculate small and big window sizes for adaptive pooling."""
+    small_window = math.ceil(input_dim / output_dim)
+    big_window = small_window + 1
+    return small_window, big_window
+
+
+def compute_static_gather_indices(input_dim, output_size, big_window):
+    """Compute gather indices for Two-Pool Gather method."""
+    window_starts = jnp.floor(
+        (jnp.arange(output_size) * input_dim) / output_size
+    ).astype(jnp.int32)
+
+    window_ends = jnp.ceil(
+        (jnp.arange(1, output_size + 1) * input_dim) / output_size
+    ).astype(jnp.int32)
+
+    window_sizes = window_ends - window_starts
+    is_big_window = window_sizes == big_window
+
+    small_window = big_window - 1
+    small_pool_len = input_dim - small_window + 1
+
+    small_indices = window_starts
+    big_indices = window_starts + small_pool_len
+
+    gather_indices = jnp.where(is_big_window, big_indices, small_indices)
+    return gather_indices.astype(jnp.int32)
+
+
+# ---------- 1D Adaptive Pooling ----------
+def adaptive_avg_pool1d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Average Pooling 1D using Two-Pool Gather method."""
+    if isinstance(output_size, int):
+        output_size = (output_size,)
+
+    if data_format == "channels_first":
+        inputs = jnp.transpose(inputs, (0, 2, 1))  # NCL -> NLC
+
+    n, l, c = inputs.shape
+    out_l = output_size[0]
+
+    small_l, big_l = get_static_window_sizes(l, out_l)
+    gather_l = compute_static_gather_indices(l, out_l, big_l)
+
+    small_pool_l = lax.reduce_window(
+        inputs, 0.0, lax.add, (1, small_l, 1), (1, 1, 1), "valid"
+    )
+    small_pool_l = small_pool_l / small_l
+
+    big_pool_l = lax.reduce_window(
+        inputs, 0.0, lax.add, (1, big_l, 1), (1, 1, 1), "valid"
+    )
+    big_pool_l = big_pool_l / big_l
+
+    combined_l = jnp.concatenate([small_pool_l, big_pool_l], axis=1)
+    pooled_l = jnp.take(combined_l, gather_l, axis=1)
+
+    if data_format == "channels_first":
+        pooled_l = jnp.transpose(pooled_l, (0, 2, 1))  # NLC -> NCL
+
+    return pooled_l
+
+
+def adaptive_max_pool1d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Max Pooling 1D using Two-Pool Gather method."""
+    if isinstance(output_size, int):
+        output_size = (output_size,)
+
+    if data_format == "channels_first":
+        inputs = jnp.transpose(inputs, (0, 2, 1))  # NCL -> NLC
+
+    n, l, c = inputs.shape
+    out_l = output_size[0]
+
+    small_l, big_l = get_static_window_sizes(l, out_l)
+    gather_l = compute_static_gather_indices(l, out_l, big_l)
+
+    small_pool_l = lax.reduce_window(
+        inputs, -jnp.inf, lax.max, (1, small_l, 1), (1, 1, 1), "valid"
+    )
+    big_pool_l = lax.reduce_window(
+        inputs, -jnp.inf, lax.max, (1, big_l, 1), (1, 1, 1), "valid"
+    )
+
+    combined_l = jnp.concatenate([small_pool_l, big_pool_l], axis=1)
+    pooled_l = jnp.take(combined_l, gather_l, axis=1)
+
+    if data_format == "channels_first":
+        pooled_l = jnp.transpose(pooled_l, (0, 2, 1))  # NLC -> NCL
+
+    return pooled_l
+
+
+# ---------- 2D Adaptive Pooling ----------
+def adaptive_avg_pool2d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Average Pooling 2D using Two-Pool Gather method."""
     if isinstance(output_size, int):
         output_size = (output_size, output_size)
-    out_h, out_w = output_size
+
     if data_format == "channels_first":
         inputs = jnp.transpose(inputs, (0, 2, 3, 1))  # NCHW -> NHWC
+
     n, h, w, c = inputs.shape
-    if h % out_h == 0 and w % out_w == 0:
-        kernel_h = h // out_h
-        kernel_w = w // out_w
-        stride_h = kernel_h
-        stride_w = kernel_w
-        pooled = lax.reduce_window(
-            inputs,
-            0.0,
-            lax.add,
-            (1, kernel_h, kernel_w, 1),
-            (1, stride_h, stride_w, 1),
-            "VALID",
-        )
-        pooled = pooled / (kernel_h * kernel_w)
-    else:
-        start_h = jnp.arange(out_h, dtype=jnp.int32) * h // out_h
-        end_h = jnp.minimum(
-            ((jnp.arange(out_h, dtype=jnp.int32) + 1) * h + out_h - 1) // out_h,
-            h,
-        )
-        start_w = jnp.arange(out_w, dtype=jnp.int32) * w // out_w
-        end_w = jnp.minimum(
-            ((jnp.arange(out_w, dtype=jnp.int32) + 1) * w + out_w - 1) // out_w,
-            w,
-        )
-        pooled = jnp.zeros((n, out_h, out_w, c), dtype=inputs.dtype)
-        for i in range(out_h):
-            sh = start_h[i]
-            eh = end_h[i]
-            for j in range(out_w):
-                sw = start_w[j]
-                ew = end_w[j]
-                region = inputs[:, sh:eh, sw:ew, :]
-                pooled = pooled.at[:, i, j, :].set(
-                    jnp.mean(region, axis=(1, 2))
-                )
+    out_h, out_w = output_size
+
+    small_h, big_h = get_static_window_sizes(h, out_h)
+    gather_h = compute_static_gather_indices(h, out_h, big_h)
+
+    small_w, big_w = get_static_window_sizes(w, out_w)
+    gather_w = compute_static_gather_indices(w, out_w, big_w)
+
+    small_pool_h = lax.reduce_window(
+        inputs, 0.0, lax.add, (1, small_h, 1, 1), (1, 1, 1, 1), "valid"
+    )
+    small_pool_h = small_pool_h / small_h
+
+    big_pool_h = lax.reduce_window(
+        inputs, 0.0, lax.add, (1, big_h, 1, 1), (1, 1, 1, 1), "valid"
+    )
+    big_pool_h = big_pool_h / big_h
+
+    combined_h = jnp.concatenate([small_pool_h, big_pool_h], axis=1)
+    pooled_h = jnp.take(combined_h, gather_h, axis=1)
+
+    small_pool_w = lax.reduce_window(
+        pooled_h, 0.0, lax.add, (1, 1, small_w, 1), (1, 1, 1, 1), "valid"
+    )
+    small_pool_w = small_pool_w / small_w
+
+    big_pool_w = lax.reduce_window(
+        pooled_h, 0.0, lax.add, (1, 1, big_w, 1), (1, 1, 1, 1), "valid"
+    )
+    big_pool_w = big_pool_w / big_w
+
+    combined_w = jnp.concatenate([small_pool_w, big_pool_w], axis=2)
+    pooled_w = jnp.take(combined_w, gather_w, axis=2)
 
     if data_format == "channels_first":
-        pooled = jnp.transpose(pooled, (0, 3, 1, 2))  # NHWC -> NCHW
-    return pooled
+        pooled_w = jnp.transpose(pooled_w, (0, 3, 1, 2))  # NHWC -> NCHW
 
+    return pooled_w
 
-def adaptive_max_pool(
-    inputs, output_size, data_format="channels_first", name=None
-):
+
+def adaptive_max_pool2d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Max Pooling 2D using Two-Pool Gather method."""
     if isinstance(output_size, int):
         output_size = (output_size, output_size)
-    out_h, out_w = output_size
+
     if data_format == "channels_first":
         inputs = jnp.transpose(inputs, (0, 2, 3, 1))  # NCHW -> NHWC
+
     n, h, w, c = inputs.shape
-    if h % out_h == 0 and w % out_w == 0:
-        kernel_h = h // out_h
-        kernel_w = w // out_w
-        stride_h = kernel_h
-        stride_w = kernel_w
-        pooled = lax.reduce_window(
-            inputs,
-            -jnp.inf,
-            lax.max,
-            (1, kernel_h, kernel_w, 1),
-            (1, stride_h, stride_w, 1),
-            "VALID",
-        )
+    out_h, out_w = output_size
+
+    small_h, big_h = get_static_window_sizes(h, out_h)
+    gather_h = compute_static_gather_indices(h, out_h, big_h)
+
+    small_w, big_w = get_static_window_sizes(w, out_w)
+    gather_w = compute_static_gather_indices(w, out_w, big_w)
+
+    small_pool_h = lax.reduce_window(
+        inputs, -jnp.inf, lax.max, (1, small_h, 1, 1), (1, 1, 1, 1), "valid"
+    )
+    big_pool_h = lax.reduce_window(
+        inputs, -jnp.inf, lax.max, (1, big_h, 1, 1), (1, 1, 1, 1), "valid"
+    )
+
+    combined_h = jnp.concatenate([small_pool_h, big_pool_h], axis=1)
+    pooled_h = jnp.take(combined_h, gather_h, axis=1)
+
+    small_pool_w = lax.reduce_window(
+        pooled_h, -jnp.inf, lax.max, (1, 1, small_w, 1), (1, 1, 1, 1), "valid"
+    )
+    big_pool_w = lax.reduce_window(
+        pooled_h, -jnp.inf, lax.max, (1, 1, big_w, 1), (1, 1, 1, 1), "valid"
+    )
+
+    combined_w = jnp.concatenate([small_pool_w, big_pool_w], axis=2)
+    pooled_w = jnp.take(combined_w, gather_w, axis=2)
+
+    if data_format == "channels_first":
+        pooled_w = jnp.transpose(pooled_w, (0, 3, 1, 2))  # NHWC -> NCHW
+
+    return pooled_w
+
+
+# ---------- 3D Adaptive Pooling ----------
+def adaptive_avg_pool3d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Average Pooling 3D using Two-Pool Gather method."""
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+
+    if data_format == "channels_first":
+        inputs = jnp.transpose(inputs, (0, 2, 3, 4, 1))  # NCDHW -> NDHWC
+
+    n, d, h, w, c = inputs.shape
+    out_d, out_h, out_w = output_size
+
+    small_d, big_d = get_static_window_sizes(d, out_d)
+    gather_d = compute_static_gather_indices(d, out_d, big_d)
+
+    small_h, big_h = get_static_window_sizes(h, out_h)
+    gather_h = compute_static_gather_indices(h, out_h, big_h)
+
+    small_w, big_w = get_static_window_sizes(w, out_w)
+    gather_w = compute_static_gather_indices(w, out_w, big_w)
+
+    small_pool_d = lax.reduce_window(
+        inputs, 0.0, lax.add, (1, small_d, 1, 1, 1), (1, 1, 1, 1, 1), "valid"
+    )
+    small_pool_d = small_pool_d / small_d
+
+    big_pool_d = lax.reduce_window(
+        inputs, 0.0, lax.add, (1, big_d, 1, 1, 1), (1, 1, 1, 1, 1), "valid"
+    )
+    big_pool_d = big_pool_d / big_d
+
+    combined_d = jnp.concatenate([small_pool_d, big_pool_d], axis=1)
+    pooled_d = jnp.take(combined_d, gather_d, axis=1)
+
+    small_pool_h = lax.reduce_window(
+        pooled_d, 0.0, lax.add, (1, 1, small_h, 1, 1), (1, 1, 1, 1, 1), "valid"
+    )
+    small_pool_h = small_pool_h / small_h
+
+    big_pool_h = lax.reduce_window(
+        pooled_d, 0.0, lax.add, (1, 1, big_h, 1, 1), (1, 1, 1, 1, 1), "valid"
+    )
+    big_pool_h = big_pool_h / big_h
+
+    combined_h = jnp.concatenate([small_pool_h, big_pool_h], axis=2)
+    pooled_h = jnp.take(combined_h, gather_h, axis=2)
+
+    small_pool_w = lax.reduce_window(
+        pooled_h, 0.0, lax.add, (1, 1, 1, small_w, 1), (1, 1, 1, 1, 1), "valid"
+    )
+    small_pool_w = small_pool_w / small_w
+
+    big_pool_w = lax.reduce_window(
+        pooled_h, 0.0, lax.add, (1, 1, 1, big_w, 1), (1, 1, 1, 1, 1), "valid"
+    )
+    big_pool_w = big_pool_w / big_w
+
+    combined_w = jnp.concatenate([small_pool_w, big_pool_w], axis=3)
+    pooled_w = jnp.take(combined_w, gather_w, axis=3)
+
+    if data_format == "channels_first":
+        pooled_w = jnp.transpose(pooled_w, (0, 4, 1, 2, 3))  # NDHWC -> NCDHW
+
+    return pooled_w
+
+
+def adaptive_max_pool3d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Max Pooling 3D using Two-Pool Gather method."""
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+
+    if data_format == "channels_first":
+        inputs = jnp.transpose(inputs, (0, 2, 3, 4, 1))  # NCDHW -> NDHWC
+
+    n, d, h, w, c = inputs.shape
+    out_d, out_h, out_w = output_size
+
+    small_d, big_d = get_static_window_sizes(d, out_d)
+    gather_d = compute_static_gather_indices(d, out_d, big_d)
+
+    small_h, big_h = get_static_window_sizes(h, out_h)
+    gather_h = compute_static_gather_indices(h, out_h, big_h)
+
+    small_w, big_w = get_static_window_sizes(w, out_w)
+    gather_w = compute_static_gather_indices(w, out_w, big_w)
+
+    small_pool_d = lax.reduce_window(
+        inputs,
+        -jnp.inf,
+        lax.max,
+        (1, small_d, 1, 1, 1),
+        (1, 1, 1, 1, 1),
+        "valid",
+    )
+    big_pool_d = lax.reduce_window(
+        inputs, -jnp.inf, lax.max, (1, big_d, 1, 1, 1), (1, 1, 1, 1, 1), "valid"
+    )
+
+    combined_d = jnp.concatenate([small_pool_d, big_pool_d], axis=1)
+    pooled_d = jnp.take(combined_d, gather_d, axis=1)
+
+    small_pool_h = lax.reduce_window(
+        pooled_d,
+        -jnp.inf,
+        lax.max,
+        (1, 1, small_h, 1, 1),
+        (1, 1, 1, 1, 1),
+        "valid",
+    )
+    big_pool_h = lax.reduce_window(
+        pooled_d,
+        -jnp.inf,
+        lax.max,
+        (1, 1, big_h, 1, 1),
+        (1, 1, 1, 1, 1),
+        "valid",
+    )
+
+    combined_h = jnp.concatenate([small_pool_h, big_pool_h], axis=2)
+    pooled_h = jnp.take(combined_h, gather_h, axis=2)
+
+    small_pool_w = lax.reduce_window(
+        pooled_h,
+        -jnp.inf,
+        lax.max,
+        (1, 1, 1, small_w, 1),
+        (1, 1, 1, 1, 1),
+        "valid",
+    )
+    big_pool_w = lax.reduce_window(
+        pooled_h,
+        -jnp.inf,
+        lax.max,
+        (1, 1, 1, big_w, 1),
+        (1, 1, 1, 1, 1),
+        "valid",
+    )
+
+    combined_w = jnp.concatenate([small_pool_w, big_pool_w], axis=3)
+    pooled_w = jnp.take(combined_w, gather_w, axis=3)
+
+    if data_format == "channels_first":
+        pooled_w = jnp.transpose(pooled_w, (0, 4, 1, 2, 3))  # NDHWC -> NCDHW
+
+    return pooled_w
+
+
+# ---------- Updated Dispatcher ----------
+def adaptive_avg_pool(inputs, output_size, data_format="channels_first"):
+    """Dispatcher for adaptive average pooling (1D, 2D, or 3D)."""
+    ndims = inputs.ndim - 2
+    if ndims == 1:
+        return adaptive_avg_pool1d(inputs, output_size, data_format)
+    elif ndims == 2:
+        return adaptive_avg_pool2d(inputs, output_size, data_format)
+    elif ndims == 3:
+        return adaptive_avg_pool3d(inputs, output_size, data_format)
     else:
-        start_h = jnp.arange(out_h, dtype=jnp.int32) * h // out_h
-        end_h = jnp.minimum(
-            ((jnp.arange(out_h, dtype=jnp.int32) + 1) * h + out_h - 1) // out_h,
-            h,
+        raise ValueError(
+            "adaptive_avg_pool supports 1D, 2D, or 3D inputs only."
         )
-        start_w = jnp.arange(out_w, dtype=jnp.int32) * w // out_w
-        end_w = jnp.minimum(
-            ((jnp.arange(out_w, dtype=jnp.int32) + 1) * w + out_w - 1) // out_w,
-            w,
+
+
+def adaptive_max_pool(inputs, output_size, data_format="channels_first"):
+    """Dispatcher for adaptive max pooling (1D, 2D, or 3D)."""
+    ndims = inputs.ndim - 2
+    if ndims == 1:
+        return adaptive_max_pool1d(inputs, output_size, data_format)
+    elif ndims == 2:
+        return adaptive_max_pool2d(inputs, output_size, data_format)
+    elif ndims == 3:
+        return adaptive_max_pool3d(inputs, output_size, data_format)
+    else:
+        raise ValueError(
+            "adaptive_max_pool supports 1D, 2D, or 3D inputs only."
         )
-        pooled = jnp.zeros((n, out_h, out_w, c), dtype=inputs.dtype)
-        for i in range(out_h):
-            sh = start_h[i]
-            eh = end_h[i]
-            for j in range(out_w):
-                sw = start_w[j]
-                ew = end_w[j]
-                region = inputs[:, sh:eh, sw:ew, :]
-                pooled = pooled.at[:, i, j, :].set(jnp.max(region, axis=(1, 2)))
-    if data_format == "channels_first":
-        pooled = jnp.transpose(pooled, (0, 3, 1, 2))  # NHWC -> NCHW
-    return pooled
diff --git a/keras/src/backend/numpy/nn.py b/keras/src/backend/numpy/nn.py
index d9034aa5da28..44f3fb882e12 100644
--- a/keras/src/backend/numpy/nn.py
+++ b/keras/src/backend/numpy/nn.py
@@ -1237,63 +1237,3 @@ def _pair(x):
 
     # ---- reshape -> (N, C*kH*kW, L) ----
     return patches.reshape(N, C * k[0] * k[1], -1)
-
-
-def _adaptive_pool2d(inputs, output_size, mode="avg", data_format=None):
-    """Adaptive pooling for 2D inputs."""
-    data_format = backend.standardize_data_format(data_format)
-    x = convert_to_tensor(inputs)
-
-    if isinstance(output_size, int):
-        out_h = out_w = output_size
-    else:
-        out_h, out_w = output_size
-
-    if data_format == "channels_last":
-        N, H, W, C = x.shape
-        x_nchw = np.transpose(x, (0, 3, 1, 2))
-    else:
-        N, C, H, W = x.shape
-        x_nchw = x
-
-    # Precompute start and end indices using integer arithmetic
-    h_starts = np.array([i * H // out_h for i in range(out_h)], dtype=int)
-    h_ends = np.array(
-        [min(((i + 1) * H + out_h - 1) // out_h, H) for i in range(out_h)],
-        dtype=int,
-    )
-    w_starts = np.array([j * W // out_w for j in range(out_w)], dtype=int)
-    w_ends = np.array(
-        [min(((j + 1) * W + out_w - 1) // out_w, W) for j in range(out_w)],
-        dtype=int,
-    )
-
-    out = np.empty((N, C, out_h, out_w), dtype=x.dtype)
-
-    for i in range(out_h):
-        for j in range(out_w):
-            patch = x_nchw[
-                :, :, h_starts[i] : h_ends[i], w_starts[j] : w_ends[j]
-            ]
-            if mode == "avg":
-                out[:, :, i, j] = np.mean(patch, axis=(2, 3))
-            else:
-                out[:, :, i, j] = np.max(patch, axis=(2, 3))
-
-    if data_format == "channels_last":
-        return np.transpose(out, (0, 2, 3, 1))
-    return out
-
-
-def adaptive_avg_pool(inputs, output_size, data_format=None):
-    """Adaptive average pooling 2D wrapper."""
-    return _adaptive_pool2d(
-        inputs, output_size, mode="avg", data_format=data_format
-    )
-
-
-def adaptive_max_pool(inputs, output_size, data_format=None):
-    """Adaptive max pooling 2D wrapper."""
-    return _adaptive_pool2d(
-        inputs, output_size, mode="max", data_format=data_format
-    )
diff --git a/keras/src/backend/tensorflow/nn.py b/keras/src/backend/tensorflow/nn.py
index cc86cd23c358..9310719af152 100644
--- a/keras/src/backend/tensorflow/nn.py
+++ b/keras/src/backend/tensorflow/nn.py
@@ -240,12 +240,280 @@ def max_pool(
     return outputs
 
 
-def adaptive_max_pool(inputs, output_size, data_format=None):
-    raise NotImplementedError(
-        "Adaptive pooling not implemented for TensorFlow.  "
-        "Use JAX or Torch backend."
+def get_static_window_sizes(input_dim, output_dim):
+    """Calculate small and big window sizes for adaptive pooling."""
+    if input_dim < output_dim:
+        small_window = 1
+    else:
+        small_window = max(1, math.ceil(input_dim / output_dim))
+
+    big_window = small_window + 1
+
+    # Ensure windows don't exceed input dimension
+    small_window = min(small_window, input_dim)
+    big_window = min(big_window, input_dim)
+
+    return small_window, big_window
+
+
+def compute_static_gather_indices(
+    input_dim, output_size, small_window, big_window
+):
+    """Compute gather indices for Two-Pool Gather method (corrected)."""
+    window_starts = tf.cast(
+        tf.floor(
+            tf.cast(tf.range(output_size), tf.float32)
+            * tf.cast(input_dim, tf.float32)
+            / tf.cast(output_size, tf.float32)
+        ),
+        tf.int32,
+    )
+    window_ends = tf.cast(
+        tf.math.ceil(
+            tf.cast(tf.range(1, output_size + 1), tf.float32)
+            * tf.cast(input_dim, tf.float32)
+            / tf.cast(output_size, tf.float32)
+        ),
+        tf.int32,
+    )
+
+    window_ends = tf.minimum(window_ends, input_dim)
+    window_starts = tf.minimum(window_starts, input_dim - 1)
+
+    window_sizes = window_ends - window_starts
+    is_big_window = tf.equal(window_sizes, big_window)
+
+    small_pool_len = max(1, input_dim - small_window + 1)
+
+    small_indices = window_starts
+    big_indices = window_starts + small_pool_len
+
+    gather_indices = tf.where(is_big_window, big_indices, small_indices)
+    return tf.cast(gather_indices, tf.int32)
+
+
+def adaptive_max_pool1d(inputs, output_size, data_format="channels_first"):
+    if isinstance(output_size, int):
+        output_size = (output_size,)
+    if data_format == "channels_first":
+        inputs = tf.transpose(inputs, (0, 2, 1))
+
+    static_shape = inputs.shape.as_list()
+    l_static = static_shape[1]
+    out_l = output_size[0]
+
+    if l_static is None:
+        raise ValueError(
+            "Input length must be statically known for adaptive pooling"
+        )
+
+    small_l, big_l = get_static_window_sizes(l_static, out_l)
+    gather_l = compute_static_gather_indices(l_static, out_l, small_l, big_l)
+
+    small_pool_l = tf.nn.pool(
+        inputs,
+        window_shape=(small_l,),
+        pooling_type="MAX",
+        strides=(1,),
+        padding="VALID",
+        data_format="NWC",
+    )
+    big_pool_l = tf.nn.pool(
+        inputs,
+        window_shape=(big_l,),
+        pooling_type="MAX",
+        strides=(1,),
+        padding="VALID",
+        data_format="NWC",
     )
 
+    combined_l = tf.concat([small_pool_l, big_pool_l], axis=1)
+    pooled_l = tf.gather(combined_l, gather_l, axis=1)
+
+    if data_format == "channels_first":
+        pooled_l = tf.transpose(pooled_l, (0, 2, 1))
+    return pooled_l
+
+
+def adaptive_max_pool2d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Max Pooling 2D using Two-Pool Gather method."""
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size)
+
+    if data_format == "channels_first":
+        inputs = tf.transpose(inputs, (0, 2, 3, 1))
+
+    static_shape = inputs.shape.as_list()
+    h_static = static_shape[1]
+    w_static = static_shape[2]
+    out_h, out_w = output_size
+
+    if h_static is None or w_static is None:
+        raise ValueError(
+            "Input spatial dimensions must be "
+            "statically known for adaptive pooling"
+        )
+
+    small_h, big_h = get_static_window_sizes(h_static, out_h)
+    small_w, big_w = get_static_window_sizes(w_static, out_w)
+
+    gather_h = compute_static_gather_indices(h_static, out_h, small_h, big_h)
+    gather_w = compute_static_gather_indices(w_static, out_w, small_w, big_w)
+
+    small_pool_h = tf.nn.pool(
+        inputs,
+        window_shape=(small_h, 1),
+        pooling_type="MAX",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+    big_pool_h = tf.nn.pool(
+        inputs,
+        window_shape=(big_h, 1),
+        pooling_type="MAX",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+
+    combined_h = tf.concat([small_pool_h, big_pool_h], axis=1)
+    pooled_h = tf.gather(combined_h, gather_h, axis=1)
+
+    small_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, small_w),
+        pooling_type="MAX",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+    big_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, big_w),
+        pooling_type="MAX",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+
+    combined_w = tf.concat([small_pool_w, big_pool_w], axis=2)
+    pooled_w = tf.gather(combined_w, gather_w, axis=2)
+
+    if data_format == "channels_first":
+        pooled_w = tf.transpose(pooled_w, (0, 3, 1, 2))
+
+    return pooled_w
+
+
+def adaptive_max_pool3d(inputs, output_size, data_format="channels_first"):
+    """Adaptive Max Pooling 3D using Two-Pool Gather method."""
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+
+    if data_format == "channels_first":
+        inputs = tf.transpose(inputs, (0, 2, 3, 4, 1))
+
+    static_shape = inputs.shape.as_list()
+    d_static = static_shape[1]
+    h_static = static_shape[2]
+    w_static = static_shape[3]
+    out_d, out_h, out_w = output_size
+
+    if d_static is None or h_static is None or w_static is None:
+        raise ValueError(
+            "Input spatial dimensions must be "
+            "statically known for adaptive pooling"
+        )
+
+    small_d, big_d = get_static_window_sizes(d_static, out_d)
+    small_h, big_h = get_static_window_sizes(h_static, out_h)
+    small_w, big_w = get_static_window_sizes(w_static, out_w)
+
+    gather_d = compute_static_gather_indices(d_static, out_d, small_d, big_d)
+    gather_h = compute_static_gather_indices(h_static, out_h, small_h, big_h)
+    gather_w = compute_static_gather_indices(w_static, out_w, small_w, big_w)
+
+    small_pool_d = tf.nn.pool(
+        inputs,
+        window_shape=(small_d, 1, 1),
+        pooling_type="MAX",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+    big_pool_d = tf.nn.pool(
+        inputs,
+        window_shape=(big_d, 1, 1),
+        pooling_type="MAX",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+
+    combined_d = tf.concat([small_pool_d, big_pool_d], axis=1)
+    pooled_d = tf.gather(combined_d, gather_d, axis=1)
+
+    small_pool_h = tf.nn.pool(
+        pooled_d,
+        window_shape=(1, small_h, 1),
+        pooling_type="MAX",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+    big_pool_h = tf.nn.pool(
+        pooled_d,
+        window_shape=(1, big_h, 1),
+        pooling_type="MAX",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+
+    combined_h = tf.concat([small_pool_h, big_pool_h], axis=2)
+    pooled_h = tf.gather(combined_h, gather_h, axis=2)
+
+    small_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, 1, small_w),
+        pooling_type="MAX",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+    big_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, 1, big_w),
+        pooling_type="MAX",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+
+    combined_w = tf.concat([small_pool_w, big_pool_w], axis=3)
+    pooled_w = tf.gather(combined_w, gather_w, axis=3)
+
+    if data_format == "channels_first":
+        pooled_w = tf.transpose(pooled_w, (0, 4, 1, 2, 3))
+
+    return pooled_w
+
+
+def adaptive_max_pool(inputs, output_size, data_format="channels_first"):
+    """Dispatcher for adaptive max pooling (1D, 2D, or 3D)."""
+    ndims = len(inputs.shape) - 2
+    if ndims == 1:
+        return adaptive_max_pool1d(inputs, output_size, data_format)
+    elif ndims == 2:
+        return adaptive_max_pool2d(inputs, output_size, data_format)
+    elif ndims == 3:
+        return adaptive_max_pool3d(inputs, output_size, data_format)
+    else:
+        raise ValueError(
+            "adaptive_max_pool supports 1D, 2D, or 3D inputs only."
+        )
+
 
 def average_pool(
     inputs,
@@ -275,11 +543,224 @@ def average_pool(
     return outputs
 
 
-def adaptive_avg_pool(inputs, output_size, data_format=None):
-    raise NotImplementedError(
-        "Adaptive pooling not implemented for TensorFlow.  "
-        "Use JAX or Torch backend."
+def adaptive_avg_pool1d(inputs, output_size, data_format="channels_first"):
+    if isinstance(output_size, int):
+        output_size = (output_size,)
+    if data_format == "channels_first":
+        inputs = tf.transpose(inputs, (0, 2, 1))
+
+    static_shape = inputs.shape.as_list()
+    l_static = static_shape[1]
+    out_l = output_size[0]
+
+    if l_static is None:
+        raise ValueError(
+            "Input length must be statically known for adaptive pooling"
+        )
+
+    small_l, big_l = get_static_window_sizes(l_static, out_l)
+    gather_l = compute_static_gather_indices(l_static, out_l, small_l, big_l)
+
+    small_pool_l = tf.nn.pool(
+        inputs,
+        window_shape=(small_l,),
+        pooling_type="AVG",
+        strides=(1,),
+        padding="VALID",
+        data_format="NWC",
+    )
+    big_pool_l = tf.nn.pool(
+        inputs,
+        window_shape=(big_l,),
+        pooling_type="AVG",
+        strides=(1,),
+        padding="VALID",
+        data_format="NWC",
+    )
+
+    combined_l = tf.concat([small_pool_l, big_pool_l], axis=1)
+    pooled_l = tf.gather(combined_l, gather_l, axis=1)
+
+    if data_format == "channels_first":
+        pooled_l = tf.transpose(pooled_l, (0, 2, 1))
+    return pooled_l
+
+
+def adaptive_avg_pool2d(inputs, output_size, data_format="channels_first"):
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size)
+
+    if data_format == "channels_first":
+        inputs = tf.transpose(inputs, (0, 2, 3, 1))
+
+    static_shape = inputs.shape.as_list()
+    h_static = static_shape[1]
+    w_static = static_shape[2]
+    out_h, out_w = output_size
+
+    if h_static is None or w_static is None:
+        raise ValueError(
+            "Input spatial dimensions must be "
+            "statically known for adaptive pooling"
+        )
+
+    small_h, big_h = get_static_window_sizes(h_static, out_h)
+    small_w, big_w = get_static_window_sizes(w_static, out_w)
+
+    gather_h = compute_static_gather_indices(h_static, out_h, small_h, big_h)
+    gather_w = compute_static_gather_indices(w_static, out_w, small_w, big_w)
+
+    small_pool_h = tf.nn.pool(
+        inputs,
+        window_shape=(small_h, 1),
+        pooling_type="AVG",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+    big_pool_h = tf.nn.pool(
+        inputs,
+        window_shape=(big_h, 1),
+        pooling_type="AVG",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+
+    combined_h = tf.concat([small_pool_h, big_pool_h], axis=1)
+    pooled_h = tf.gather(combined_h, gather_h, axis=1)
+
+    small_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, small_w),
+        pooling_type="AVG",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
     )
+    big_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, big_w),
+        pooling_type="AVG",
+        strides=(1, 1),
+        padding="VALID",
+        data_format="NHWC",
+    )
+
+    combined_w = tf.concat([small_pool_w, big_pool_w], axis=2)
+    pooled_w = tf.gather(combined_w, gather_w, axis=2)
+
+    if data_format == "channels_first":
+        pooled_w = tf.transpose(pooled_w, (0, 3, 1, 2))
+
+    return pooled_w
+
+
+def adaptive_avg_pool3d(inputs, output_size, data_format="channels_first"):
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+
+    if data_format == "channels_first":
+        inputs = tf.transpose(inputs, (0, 2, 3, 4, 1))
+
+    static_shape = inputs.shape.as_list()
+    d_static = static_shape[1]
+    h_static = static_shape[2]
+    w_static = static_shape[3]
+    out_d, out_h, out_w = output_size
+
+    if d_static is None or h_static is None or w_static is None:
+        raise ValueError(
+            "Input spatial dimensions must be "
+            "statically known for adaptive pooling"
+        )
+
+    small_d, big_d = get_static_window_sizes(d_static, out_d)
+    small_h, big_h = get_static_window_sizes(h_static, out_h)
+    small_w, big_w = get_static_window_sizes(w_static, out_w)
+
+    gather_d = compute_static_gather_indices(d_static, out_d, small_d, big_d)
+    gather_h = compute_static_gather_indices(h_static, out_h, small_h, big_h)
+    gather_w = compute_static_gather_indices(w_static, out_w, small_w, big_w)
+
+    small_pool_d = tf.nn.pool(
+        inputs,
+        window_shape=(small_d, 1, 1),
+        pooling_type="AVG",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+    big_pool_d = tf.nn.pool(
+        inputs,
+        window_shape=(big_d, 1, 1),
+        pooling_type="AVG",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+
+    combined_d = tf.concat([small_pool_d, big_pool_d], axis=1)
+    pooled_d = tf.gather(combined_d, gather_d, axis=1)
+
+    small_pool_h = tf.nn.pool(
+        pooled_d,
+        window_shape=(1, small_h, 1),
+        pooling_type="AVG",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+    big_pool_h = tf.nn.pool(
+        pooled_d,
+        window_shape=(1, big_h, 1),
+        pooling_type="AVG",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+
+    combined_h = tf.concat([small_pool_h, big_pool_h], axis=2)
+    pooled_h = tf.gather(combined_h, gather_h, axis=2)
+
+    small_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, 1, small_w),
+        pooling_type="AVG",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+    big_pool_w = tf.nn.pool(
+        pooled_h,
+        window_shape=(1, 1, big_w),
+        pooling_type="AVG",
+        strides=(1, 1, 1),
+        padding="VALID",
+        data_format="NDHWC",
+    )
+
+    combined_w = tf.concat([small_pool_w, big_pool_w], axis=3)
+    pooled_w = tf.gather(combined_w, gather_w, axis=3)
+
+    if data_format == "channels_first":
+        pooled_w = tf.transpose(pooled_w, (0, 4, 1, 2, 3))
+
+    return pooled_w
+
+
+def adaptive_avg_pool(inputs, output_size, data_format="channels_first"):
+    ndims = len(inputs.shape) - 2
+    if ndims == 1:
+        return adaptive_avg_pool1d(inputs, output_size, data_format)
+    elif ndims == 2:
+        return adaptive_avg_pool2d(inputs, output_size, data_format)
+    elif ndims == 3:
+        return adaptive_avg_pool3d(inputs, output_size, data_format)
+    else:
+        raise ValueError(
+            "adaptive_avg_pool supports 1D, 2D, or 3D inputs only."
+        )
 
 
 def _convert_data_format(data_format, ndim):
diff --git a/keras/src/layers/__init__.py b/keras/src/layers/__init__.py
index cf5a0595ca10..e2d1ec0a6479 100644
--- a/keras/src/layers/__init__.py
+++ b/keras/src/layers/__init__.py
@@ -63,10 +63,18 @@
     SpectralNormalization,
 )
 from keras.src.layers.normalization.unit_normalization import UnitNormalization
+from keras.src.layers.pooling.adaptive_average_pooling1d import (
+    AdaptiveAveragePooling1D,
+)
 from keras.src.layers.pooling.adaptive_average_pooling2d import (
     AdaptiveAveragePooling2D,
 )
+from keras.src.layers.pooling.adaptive_average_pooling3d import (
+    AdaptiveAveragePooling3D,
+)
+from keras.src.layers.pooling.adaptive_max_pooling1d import AdaptiveMaxPooling1D
 from keras.src.layers.pooling.adaptive_max_pooling2d import AdaptiveMaxPooling2D
+from keras.src.layers.pooling.adaptive_max_pooling3d import AdaptiveMaxPooling3D
 from keras.src.layers.pooling.average_pooling1d import AveragePooling1D
 from keras.src.layers.pooling.average_pooling2d import AveragePooling2D
 from keras.src.layers.pooling.average_pooling3d import AveragePooling3D
diff --git a/keras/src/layers/pooling/__init__.py b/keras/src/layers/pooling/__init__.py
index edea894680d8..ed06581b27d6 100644
--- a/keras/src/layers/pooling/__init__.py
+++ b/keras/src/layers/pooling/__init__.py
@@ -1,4 +1,12 @@
+from keras.src.layers.pooling.adaptive_average_pooling1d import (
+    AdaptiveAveragePooling1D,
+)
 from keras.src.layers.pooling.adaptive_average_pooling2d import (
     AdaptiveAveragePooling2D,
 )
+from keras.src.layers.pooling.adaptive_average_pooling3d import (
+    AdaptiveAveragePooling3D,
+)
+from keras.src.layers.pooling.adaptive_max_pooling1d import AdaptiveMaxPooling1D
 from keras.src.layers.pooling.adaptive_max_pooling2d import AdaptiveMaxPooling2D
+from keras.src.layers.pooling.adaptive_max_pooling3d import AdaptiveMaxPooling3D
diff --git a/keras/src/layers/pooling/adaptive_average_pooling1d.py b/keras/src/layers/pooling/adaptive_average_pooling1d.py
new file mode 100644
index 000000000000..a6d6deeb41a0
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_average_pooling1d.py
@@ -0,0 +1,84 @@
+"""Adaptive Average Pooling 1D layer."""
+
+from keras import config
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.AdaptiveAveragePooling1D")
+class AdaptiveAveragePooling1D(Layer):
+    """Adaptive average pooling operation for 1D temporal or spatial data.
+
+    This layer applies an adaptive average pooling operation, which pools the
+    input such that the output has a target length specified by `output_size`,
+    regardless of the input length. The kernel size and stride are automatically
+    computed to achieve the target output size.
+
+    Args:
+        output_size: Integer specifying the target output length.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            `"channels_last"` corresponds to inputs with shape
+            `(batch, length, channels)`.
+            `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, length)`.
+            Defaults to the value found in your Keras config file at
+            `~/.keras/keras.json`. If never set, `"channels_last"` is used.
+
+    Input shape:
+        - If `data_format="channels_last"`: 3D tensor
+            `(batch_size, length, channels)`
+        - If `data_format="channels_first"`: 3D tensor
+            `(batch_size, channels, length)`
+
+    Output shape:
+        - If `data_format="channels_last"`:
+            `(batch_size, output_length, channels)`
+        - If `data_format="channels_first"`:
+            `(batch_size, channels, output_length)`
+
+    Examples:
+
+    >>> import numpy as np
+    >>> input_seq = np.random.rand(1, 64, 3)
+    >>> layer = AdaptiveAveragePooling1D(output_size=32)
+    >>> output_seq = layer(input_seq)
+    >>> output_seq.shape
+    (1, 32, 3)
+    """
+
+    def __init__(self, output_size, data_format=None, **kwargs):
+        super().__init__(**kwargs)
+        if not isinstance(output_size, int):
+            raise TypeError(
+                f"`output_size` must be an integer. "
+                f"Received: {output_size} of type {type(output_size)}"
+            )
+
+        self.output_size = output_size
+        self.data_format = data_format or config.image_data_format()
+
+        if self.data_format not in {"channels_first", "channels_last"}:
+            raise ValueError(
+                f"Invalid data_format: {self.data_format}. "
+                "Must be either 'channels_first' or 'channels_last'."
+            )
+
+    def call(self, inputs):
+        return ops.adaptive_avg_pool(
+            inputs, output_size=self.output_size, data_format=self.data_format
+        )
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == "channels_last":
+            return (input_shape[0], self.output_size, input_shape[2])
+        else:  # channels_first
+            return (input_shape[0], input_shape[1], self.output_size)
+
+    def get_config(self):
+        config_dict = {
+            "output_size": self.output_size,
+            "data_format": self.data_format,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config_dict}
diff --git a/keras/src/layers/pooling/adaptive_average_pooling3d.py b/keras/src/layers/pooling/adaptive_average_pooling3d.py
new file mode 100644
index 000000000000..b2f582301859
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_average_pooling3d.py
@@ -0,0 +1,118 @@
+"""Adaptive Average Pooling 3D layer."""
+
+from keras import config
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.AdaptiveAveragePooling3D")
+class AdaptiveAveragePooling3D(Layer):
+    """Adaptive average pooling operation for 3D spatial data.
+
+    This layer applies an adaptive average pooling operation, which pools the
+    input such that the output has a target shape specified by `output_size`,
+    regardless of the input shape. The kernel size and stride are automatically
+    computed to achieve the target output size.
+
+    Args:
+        output_size: Integer or tuple of 3 integers, specifying the target
+            output size `(depth, height, width)`.
+            If a single integer is provided, the same value is used for all
+            three dimensions.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs.
+            `"channels_last"` corresponds to inputs with shape
+            `(batch, depth, height, width, channels)` while
+            `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, depth, height, width)`.
+            Defaults to the value found in your Keras config file at
+            `~/.keras/keras.json`. If never set, then "channels_last" is used.
+
+    Input shape:
+        - If `data_format="channels_last"`:
+            5D tensor with shape `(batch_size, depth, height, width, channels)`.
+        - If `data_format="channels_first"`:
+            5D tensor with shape `(batch_size, channels, depth, height, width)`.
+
+    Output shape:
+        - If `data_format="channels_last"`:
+            5D tensor with shape
+            `(batch_size, output_depth, output_height, output_width, channels)`.
+        - If `data_format="channels_first"`:
+            5D tensor with shape
+            `(batch_size, channels, output_depth, output_height, output_width)`.
+
+    Examples:
+
+    >>> input_vol = np.random.rand(1, 16, 64, 64, 3)
+    >>> layer = keras.layers.AdaptiveAveragePooling3D(output_size=(8, 32, 32))
+    >>> output_vol = layer(input_vol)
+    >>> output_vol.shape
+    (1, 8, 32, 32, 3)
+
+    >>> # Single integer for cubic output
+    >>> layer = keras.layers.AdaptiveAveragePooling3D(output_size=4)
+    >>> output_vol = layer(input_vol)
+    >>> output_vol.shape
+    (1, 4, 4, 4, 3)
+    """
+
+    def __init__(self, output_size, data_format=None, **kwargs):
+        super().__init__(**kwargs)
+
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size, output_size)
+        elif isinstance(output_size, (list, tuple)):
+            if len(output_size) != 3:
+                raise ValueError(
+                    "`output_size` must be an integer or tuple of 3 integers. "
+                    f"Received output_size={output_size}"
+                )
+            self.output_size = tuple(output_size)
+        else:
+            raise TypeError(
+                "`output_size` must be an integer or tuple of 3 integers. "
+                "Received output_size={} of type {}".format(
+                    output_size, type(output_size)
+                )
+            )
+
+        self.data_format = data_format or config.image_data_format()
+
+        if self.data_format not in {"channels_first", "channels_last"}:
+            raise ValueError(
+                f"Invalid data_format: {self.data_format}. "
+                "Must be either 'channels_first' or 'channels_last'."
+            )
+
+    def call(self, inputs):
+        return ops.adaptive_avg_pool(
+            inputs, output_size=self.output_size, data_format=self.data_format
+        )
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == "channels_last":
+            return (
+                input_shape[0],
+                self.output_size[0],
+                self.output_size[1],
+                self.output_size[2],
+                input_shape[4],
+            )
+        else:  # channels_first
+            return (
+                input_shape[0],
+                input_shape[1],
+                self.output_size[0],
+                self.output_size[1],
+                self.output_size[2],
+            )
+
+    def get_config(self):
+        config_dict = {
+            "output_size": self.output_size,
+            "data_format": self.data_format,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config_dict}
diff --git a/keras/src/layers/pooling/adaptive_max_pooling1d.py b/keras/src/layers/pooling/adaptive_max_pooling1d.py
new file mode 100644
index 000000000000..31d67ab27895
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_max_pooling1d.py
@@ -0,0 +1,84 @@
+"""Adaptive Max Pooling 1D layer."""
+
+from keras import config
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.AdaptiveMaxPooling1D")
+class AdaptiveMaxPooling1D(Layer):
+    """Adaptive max pooling operation for 1D temporal or spatial data.
+
+    This layer applies an adaptive max pooling operation, which pools the
+    input such that the output has a target length specified by `output_size`,
+    regardless of the input length. The kernel size and stride are automatically
+    computed to achieve the target output size.
+
+    Args:
+        output_size: Integer specifying the target output length.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            `"channels_last"` corresponds to inputs with shape
+            `(batch, length, channels)`.
+            `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, length)`.
+            Defaults to the value found in your Keras config file at
+            `~/.keras/keras.json`. If never set, `"channels_last"` is used.
+
+    Input shape:
+        - If `data_format="channels_last"`:
+            3D tensor `(batch_size, length, channels)`.
+        - If `data_format="channels_first"`:
+            3D tensor `(batch_size, channels, length)`.
+
+    Output shape:
+        - If `data_format="channels_last"`:
+            3D tensor `(batch_size, output_length, channels)`.
+        - If `data_format="channels_first"`:
+            3D tensor `(batch_size, channels, output_length)`.
+
+    Examples:
+
+    >>> import numpy as np
+    >>> input_seq = np.random.rand(1, 64, 3)
+    >>> layer = AdaptiveMaxPooling1D(output_size=32)
+    >>> output_seq = layer(input_seq)
+    >>> output_seq.shape
+    (1, 32, 3)
+    """
+
+    def __init__(self, output_size, data_format=None, **kwargs):
+        super().__init__(**kwargs)
+
+        if not isinstance(output_size, int):
+            raise TypeError(
+                "`output_size` must be an integer. Received output_size={} "
+                "of type {}".format(output_size, type(output_size))
+            )
+        self.output_size = output_size
+        self.data_format = data_format or config.image_data_format()
+
+        if self.data_format not in {"channels_first", "channels_last"}:
+            raise ValueError(
+                "Invalid data_format: {}. Must be either 'channels_first' "
+                "or 'channels_last'.".format(self.data_format)
+            )
+
+    def call(self, inputs):
+        return ops.adaptive_max_pool(
+            inputs, output_size=self.output_size, data_format=self.data_format
+        )
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == "channels_last":
+            return (input_shape[0], self.output_size, input_shape[2])
+        else:  # channels_first
+            return (input_shape[0], input_shape[1], self.output_size)
+
+    def get_config(self):
+        config_dict = {
+            "output_size": self.output_size,
+            "data_format": self.data_format,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config_dict}
diff --git a/keras/src/layers/pooling/adaptive_max_pooling3d.py b/keras/src/layers/pooling/adaptive_max_pooling3d.py
new file mode 100644
index 000000000000..a8074e5e426f
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_max_pooling3d.py
@@ -0,0 +1,115 @@
+"""Adaptive Max Pooling 3D layer."""
+
+from keras import config
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.AdaptiveMaxPooling3D")
+class AdaptiveMaxPooling3D(Layer):
+    """Adaptive max pooling operation for 3D spatial data.
+
+    This layer applies an adaptive max pooling operation, which pools the
+    input such that the output has a target shape specified by `output_size`,
+    regardless of the input shape. The kernel size and stride are automatically
+    computed to achieve the target output size.
+
+    Args:
+        output_size: Integer or tuple of 3 integers specifying the target
+            output size `(depth, height, width)`. If a single integer is
+            provided, the same value is used for all three dimensions.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            `"channels_last"` corresponds to inputs with shape
+            `(batch, depth, height, width, channels)`.
+            `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, depth, height, width)`.
+            Defaults to the value found in your Keras config file at
+            `~/.keras/keras.json`. If never set, `"channels_last"` is used.
+
+    Input shape:
+        - If `data_format="channels_last"`:
+            5D tensor with shape `(batch_size, depth, height, width, channels)`.
+        - If `data_format="channels_first"`:
+            5D tensor with shape `(batch_size, channels, depth, height, width)`.
+
+    Output shape:
+        - If `data_format="channels_last"`:
+            5D tensor `(batch_size, output_depth, output_height,
+            output_width, channels)`.
+        - If `data_format="channels_first"`:
+            5D tensor `(batch_size, channels, output_depth,
+            output_height, output_width)`.
+
+    Examples:
+
+    >>> import numpy as np
+    >>> input_vol = np.random.rand(1, 16, 64, 64, 3)
+    >>> layer = AdaptiveMaxPooling3D(output_size=(8, 32, 32))
+    >>> output_vol = layer(input_vol)
+    >>> output_vol.shape
+    (1, 8, 32, 32, 3)
+
+    >>> # Single integer for cubic output
+    >>> layer = AdaptiveMaxPooling3D(output_size=4)
+    >>> output_vol = layer(input_vol)
+    >>> output_vol.shape
+    (1, 4, 4, 4, 3)
+    """
+
+    def __init__(self, output_size, data_format=None, **kwargs):
+        super().__init__(**kwargs)
+
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size, output_size)
+        elif isinstance(output_size, (list, tuple)):
+            if len(output_size) != 3:
+                raise ValueError(
+                    "`output_size` must be an integer or tuple of 3 integers. "
+                    "Received: {}".format(output_size)
+                )
+            self.output_size = tuple(output_size)
+        else:
+            raise TypeError(
+                "`output_size` must be an integer or tuple of 3 integers. "
+                "Received: {} of type {}".format(output_size, type(output_size))
+            )
+
+        self.data_format = data_format or config.image_data_format()
+
+        if self.data_format not in {"channels_first", "channels_last"}:
+            raise ValueError(
+                "Invalid data_format: {}. Must be either 'channels_first' or "
+                "'channels_last'.".format(self.data_format)
+            )
+
+    def call(self, inputs):
+        return ops.adaptive_max_pool(
+            inputs, output_size=self.output_size, data_format=self.data_format
+        )
+
+    def compute_output_shape(self, input_shape):
+        if self.data_format == "channels_last":
+            return (
+                input_shape[0],
+                self.output_size[0],
+                self.output_size[1],
+                self.output_size[2],
+                input_shape[4],
+            )
+        else:  # channels_first
+            return (
+                input_shape[0],
+                input_shape[1],
+                self.output_size[0],
+                self.output_size[1],
+                self.output_size[2],
+            )
+
+    def get_config(self):
+        config_dict = {
+            "output_size": self.output_size,
+            "data_format": self.data_format,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config_dict}
diff --git a/keras/src/layers/pooling/adaptive_pooling1d_test.py b/keras/src/layers/pooling/adaptive_pooling1d_test.py
new file mode 100644
index 000000000000..7f0c60e38076
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_pooling1d_test.py
@@ -0,0 +1,93 @@
+"""Tests for Adaptive Average and Max Pooling 1D layer."""
+
+import numpy as np
+import pytest
+
+from keras.src import backend as K
+from keras.src import layers
+from keras.src import ops
+from keras.src import testing
+
+SKIP_BACKENDS = ["openvino"]
+
+pytestmark = pytest.mark.skipif(
+    K.backend() in SKIP_BACKENDS,
+    reason=(
+        "Adaptive pooling tests not supported for backend: {}".format(
+            K.backend()
+        )
+    ),
+)
+
+try:
+    import torch
+
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+
+class AdaptivePooling1DLayerTest(testing.TestCase):
+    """Basic tests for AdaptiveAveragePooling1D and AdaptiveMaxPooling1D."""
+
+    def _run_layer_test(self, layer_class, x_np, output_size, data_format):
+        layer = layer_class(output_size=output_size, data_format=data_format)
+        y = layer(x_np)
+        expected_shape = layer.compute_output_shape(x_np.shape)
+        self.assertEqual(y.shape, expected_shape)
+
+    def test_average_pooling_basic_shapes(self):
+        shape = (2, 3, 8)  # N,C,L
+        x = np.random.randn(*shape).astype("float32")
+        self._run_layer_test(
+            layers.AdaptiveAveragePooling1D,
+            x,
+            output_size=4,
+            data_format="channels_first",
+        )
+
+    def test_max_pooling_basic_shapes(self):
+        shape = (2, 3, 8)
+        x = np.random.randn(*shape).astype("float32")
+        self._run_layer_test(
+            layers.AdaptiveMaxPooling1D,
+            x,
+            output_size=4,
+            data_format="channels_first",
+        )
+
+
+@pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
+@pytest.mark.parametrize("output_size", [1, 2, 3, 4])
+def test_adaptive_avg_pool1d_matches_torch(output_size):
+    x_np = np.random.randn(2, 3, 8).astype(np.float32)
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_avg_pool1d(x_torch, output_size)
+
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_avg_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+    y_keras_np = np.asarray(y_keras)
+
+    np.testing.assert_allclose(
+        y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
+    )
+
+
+@pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
+@pytest.mark.parametrize("output_size", [1, 2, 3, 4])
+def test_adaptive_max_pool1d_matches_torch(output_size):
+    x_np = np.random.randn(2, 3, 8).astype(np.float32)
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_max_pool1d(x_torch, output_size)
+
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_max_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+    y_keras_np = np.asarray(y_keras)
+
+    np.testing.assert_allclose(
+        y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
+    )
diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
index f12712f8b055..d6f48a46ab86 100644
--- a/keras/src/layers/pooling/adaptive_pooling2d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -1,4 +1,4 @@
-"""Tests for Adaptive Average and Max Pooling 2D layers."""
+"""Tests for Adaptive Average and Max Pooling 2D layer."""
 
 import numpy as np
 import pytest
@@ -8,14 +8,17 @@
 from keras.src import ops
 from keras.src import testing
 
-SKIP_BACKENDS = ["openvino", "tensorflow"]
+SKIP_BACKENDS = ["openvino"]
 
 pytestmark = pytest.mark.skipif(
     K.backend() in SKIP_BACKENDS,
-    reason=f"Adaptive pooling tests not supported for backend: {K.backend()}",
+    reason=(
+        "Adaptive pooling tests not supported for backend: {}".format(
+            K.backend()
+        )
+    ),
 )
 
-
 try:
     import torch
 
@@ -24,146 +27,47 @@
     TORCH_AVAILABLE = False
 
 
-class AdaptiveAveragePooling2DTest(testing.TestCase):
-    """Test suite for AdaptiveAveragePooling2D layer."""
-
-    def test_adaptive_avg_pooling_2d_basic(self):
-        """Test basic functionality with square output, channels_last."""
-        layer = layers.AdaptiveAveragePooling2D(
-            output_size=4, data_format="channels_last"
-        )
-        x = np.random.randn(2, 8, 8, 3).astype("float32")  # NHWC
-        y = layer(x)
-        self.assertEqual(y.shape, (2, 4, 4, 3))
-
-    def test_adaptive_avg_pooling_2d_rectangular(self):
-        """Test with rectangular output size, channels_last."""
-        layer = layers.AdaptiveAveragePooling2D(
-            output_size=(2, 4), data_format="channels_last"
-        )
-        x = np.random.randn(2, 8, 8, 3).astype("float32")  # NHWC
-        y = layer(x)
-        self.assertEqual(y.shape, (2, 2, 4, 3))
-
-    def test_adaptive_avg_pooling_2d_channels_first(self):
-        """Test channels_first data format."""
-        layer = layers.AdaptiveAveragePooling2D(
-            output_size=4, data_format="channels_first"
-        )
-        x = np.random.randn(2, 3, 8, 8).astype("float32")  # NCHW
-        y = layer(x)
-        self.assertEqual(y.shape, (2, 3, 4, 4))
-
-    def test_adaptive_avg_pooling_2d_output_shape(self):
-        """Test compute_output_shape method."""
-        layer = layers.AdaptiveAveragePooling2D(
-            output_size=(2, 4), data_format="channels_last"
-        )
-        x_shape = (2, 8, 8, 3)
-        output_shape = layer.compute_output_shape(x_shape)
-        self.assertEqual(output_shape, (2, 2, 4, 3))
-
-    def test_adaptive_avg_pooling_2d_invalid_output_size(self):
-        """Test error handling for invalid output_size."""
-        with self.assertRaisesRegex(ValueError, "`output_size` must be"):
-            layers.AdaptiveAveragePooling2D(output_size=(2, 3, 4))
-
-    def test_adaptive_avg_pooling_2d_invalid_data_format(self):
-        """Test error handling for invalid data_format."""
-        with self.assertRaisesRegex(ValueError, "Invalid data_format"):
-            layer = layers.AdaptiveAveragePooling2D(
-                output_size=4, data_format="invalid"
-            )
-            x = np.random.randn(2, 8, 8, 3).astype("float32")
-            layer(x)
-
-    def test_adaptive_avg_pooling_2d_get_config(self):
-        """Test layer serialization."""
-        layer = layers.AdaptiveAveragePooling2D(
-            output_size=(3, 5), data_format="channels_first"
-        )
-        config = layer.get_config()
-        self.assertEqual(config["output_size"], (3, 5))
-        self.assertEqual(config["data_format"], "channels_first")
-
-        # Test reconstruction from config
-        new_layer = layers.AdaptiveAveragePooling2D.from_config(config)
-        self.assertEqual(new_layer.output_size, (3, 5))
-        self.assertEqual(new_layer.data_format, "channels_first")
+class AdaptivePooling2DLayerTest(testing.TestCase):
+    """Basic tests for AdaptiveAveragePooling2D and AdaptiveMaxPooling2D."""
 
+    def _run_layer_test(self, layer_class, x_np, output_size, data_format):
+        layer = layer_class(output_size=output_size, data_format=data_format)
+        y = layer(x_np)
+        expected_shape = layer.compute_output_shape(x_np.shape)
+        self.assertEqual(y.shape, expected_shape)
 
-class AdaptiveMaxPooling2DTest(testing.TestCase):
-    """Test suite for AdaptiveMaxPooling2D layer."""
-
-    def test_adaptive_max_pooling_2d_basic(self):
-        """Test basic functionality with square output, channels_last."""
-        layer = layers.AdaptiveMaxPooling2D(
-            output_size=4, data_format="channels_last"
-        )
-        x = np.random.randn(2, 8, 8, 3).astype("float32")  # NHWC
-        y = layer(x)
-        self.assertEqual(y.shape, (2, 4, 4, 3))
-
-    def test_adaptive_max_pooling_2d_rectangular(self):
-        """Test with rectangular output size, channels_last."""
-        layer = layers.AdaptiveMaxPooling2D(
-            output_size=(3, 5), data_format="channels_last"
+    def test_average_pooling_basic_shapes(self):
+        shape = (2, 3, 8, 8)  # N,C,H,W
+        x = np.random.randn(*shape).astype("float32")
+        self._run_layer_test(
+            layers.AdaptiveAveragePooling2D,
+            x,
+            output_size=4,
+            data_format="channels_first",
         )
-        x = np.random.randn(2, 9, 15, 3).astype("float32")  # NHWC
-        y = layer(x)
-        self.assertEqual(y.shape, (2, 3, 5, 3))
-
-    def test_adaptive_max_pooling_2d_channels_first(self):
-        """Test channels_first data format."""
-        layer = layers.AdaptiveMaxPooling2D(
-            output_size=4, data_format="channels_first"
-        )
-        x = np.random.randn(2, 3, 8, 8).astype("float32")  # NCHW
-        y = layer(x)
-        self.assertEqual(y.shape, (2, 3, 4, 4))
-
-    def test_adaptive_max_pooling_2d_output_shape(self):
-        """Test compute_output_shape method."""
-        layer = layers.AdaptiveMaxPooling2D(
-            output_size=(3, 5), data_format="channels_last"
-        )
-        x_shape = (2, 9, 15, 3)
-        output_shape = layer.compute_output_shape(x_shape)
-        self.assertEqual(output_shape, (2, 3, 5, 3))
-
-    def test_adaptive_max_pooling_2d_get_config(self):
-        """Test layer serialization."""
-        layer = layers.AdaptiveMaxPooling2D(
-            output_size=(3, 5), data_format="channels_first"
-        )
-        config = layer.get_config()
-        self.assertEqual(config["output_size"], (3, 5))
-        self.assertEqual(config["data_format"], "channels_first")
 
-        # Test reconstruction from config
-        new_layer = layers.AdaptiveMaxPooling2D.from_config(config)
-        self.assertEqual(new_layer.output_size, (3, 5))
-        self.assertEqual(new_layer.data_format, "channels_first")
+    def test_max_pooling_basic_shapes(self):
+        shape = (2, 3, 8, 8)
+        x = np.random.randn(*shape).astype("float32")
+        self._run_layer_test(
+            layers.AdaptiveMaxPooling2D,
+            x,
+            output_size=4,
+            data_format="channels_first",
+        )
 
 
 @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
-@pytest.mark.parametrize(
-    "output_size", [(4, 4), (2, 2), (3, 5), (1, 1), (7, 9)]
-)
-def test_adaptive_avg_pooling2d_matches_torch(output_size):
-    """Test numerical accuracy against PyTorch implementation."""
-    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)  # NCHW
-
-    # PyTorch
+@pytest.mark.parametrize("output_size", [1, 2, 3, 4])
+def test_adaptive_avg_pool2d_matches_torch(output_size):
+    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)
     x_torch = torch.tensor(x_np)
     y_torch = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
 
-    # Keras/JAX
     x_keras = ops.convert_to_tensor(x_np)
     y_keras = ops.adaptive_avg_pool(
         x_keras, output_size=output_size, data_format="channels_first"
     )
-
     y_keras_np = np.asarray(y_keras)
 
     np.testing.assert_allclose(
@@ -172,23 +76,16 @@ def test_adaptive_avg_pooling2d_matches_torch(output_size):
 
 
 @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
-@pytest.mark.parametrize(
-    "output_size", [(4, 4), (2, 2), (3, 5), (1, 1), (7, 9)]
-)
-def test_adaptive_max_pooling2d_matches_torch(output_size):
-    """Test numerical accuracy against PyTorch implementation."""
-    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)  # NCHW
-
-    # PyTorch
+@pytest.mark.parametrize("output_size", [1, 2, 3, 4])
+def test_adaptive_max_pool2d_matches_torch(output_size):
+    x_np = np.random.randn(2, 3, 8, 8).astype(np.float32)
     x_torch = torch.tensor(x_np)
     y_torch = torch.nn.functional.adaptive_max_pool2d(x_torch, output_size)
 
-    # Keras/JAX
     x_keras = ops.convert_to_tensor(x_np)
     y_keras = ops.adaptive_max_pool(
         x_keras, output_size=output_size, data_format="channels_first"
     )
-
     y_keras_np = np.asarray(y_keras)
 
     np.testing.assert_allclose(
diff --git a/keras/src/layers/pooling/adaptive_pooling3d_test.py b/keras/src/layers/pooling/adaptive_pooling3d_test.py
new file mode 100644
index 000000000000..138b24274eee
--- /dev/null
+++ b/keras/src/layers/pooling/adaptive_pooling3d_test.py
@@ -0,0 +1,93 @@
+"""Tests for Adaptive Average and Max Pooling 3D layer."""
+
+import numpy as np
+import pytest
+
+from keras.src import backend as K
+from keras.src import layers
+from keras.src import ops
+from keras.src import testing
+
+SKIP_BACKENDS = ["openvino"]
+
+pytestmark = pytest.mark.skipif(
+    K.backend() in SKIP_BACKENDS,
+    reason=(
+        "Adaptive pooling tests not supported for backend: {}".format(
+            K.backend()
+        )
+    ),
+)
+
+try:
+    import torch
+
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+
+class AdaptivePooling3DLayerTest(testing.TestCase):
+    """Basic tests for AdaptiveAveragePooling3D and AdaptiveMaxPooling3D."""
+
+    def _run_layer_test(self, layer_class, x_np, output_size, data_format):
+        layer = layer_class(output_size=output_size, data_format=data_format)
+        y = layer(x_np)
+        expected_shape = layer.compute_output_shape(x_np.shape)
+        self.assertEqual(y.shape, expected_shape)
+
+    def test_average_pooling_basic_shapes(self):
+        shape = (2, 3, 8, 8, 8)  # N,C,D,H,W
+        x = np.random.randn(*shape).astype("float32")
+        self._run_layer_test(
+            layers.AdaptiveAveragePooling3D,
+            x,
+            output_size=4,
+            data_format="channels_first",
+        )
+
+    def test_max_pooling_basic_shapes(self):
+        shape = (2, 3, 8, 8, 8)
+        x = np.random.randn(*shape).astype("float32")
+        self._run_layer_test(
+            layers.AdaptiveMaxPooling3D,
+            x,
+            output_size=4,
+            data_format="channels_first",
+        )
+
+
+@pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
+@pytest.mark.parametrize("output_size", [1, 2, 3, 4])
+def test_adaptive_avg_pool3d_matches_torch(output_size):
+    x_np = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_avg_pool3d(x_torch, output_size)
+
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_avg_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+    y_keras_np = np.asarray(y_keras)
+
+    np.testing.assert_allclose(
+        y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
+    )
+
+
+@pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch not installed")
+@pytest.mark.parametrize("output_size", [1, 2, 3, 4])
+def test_adaptive_max_pool3d_matches_torch(output_size):
+    x_np = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
+    x_torch = torch.tensor(x_np)
+    y_torch = torch.nn.functional.adaptive_max_pool3d(x_torch, output_size)
+
+    x_keras = ops.convert_to_tensor(x_np)
+    y_keras = ops.adaptive_max_pool(
+        x_keras, output_size=output_size, data_format="channels_first"
+    )
+    y_keras_np = np.asarray(y_keras)
+
+    np.testing.assert_allclose(
+        y_keras_np, y_torch.numpy(), rtol=1e-5, atol=1e-5
+    )
diff --git a/keras/src/layers/pooling/benchmark_adaptive_pooling.py b/keras/src/layers/pooling/benchmark_adaptive_pooling.py
index 778c3fde5345..dbe5e67e44b6 100644
--- a/keras/src/layers/pooling/benchmark_adaptive_pooling.py
+++ b/keras/src/layers/pooling/benchmark_adaptive_pooling.py
@@ -1,26 +1,27 @@
-# MUST be set BEFORE any imports
-# MUST be set BEFORE any imports
 import os
 
+# Environment setup before imports
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-os.environ["KERAS_BACKEND"] = "jax"  # choose 'jax' or set externally
+os.environ["KERAS_BACKEND"] = "tensorflow"  # change to 'jax' for JAX backend
 os.environ["JAX_PLATFORMS"] = "cpu"  # or 'gpu' if configured
 
 import time
 
 import jax.numpy as jnp
 import numpy as np
-
-# Library imports must be after env vars above
+import tensorflow as tf
 import torch
 
 from keras.src.backend.jax.nn import adaptive_avg_pool as jax_adaptive_avg_pool
+from keras.src.backend.tensorflow.nn import (
+    adaptive_avg_pool as tf_adaptive_avg_pool,
+)
 
-# Test configurations
+# Test configurations (batch, channels, H, W, output H, output W)
 test_cases = [
-    (32, 3, 64, 64, 4, 4),  # Small
-    (32, 3, 224, 224, 7, 7),  # Medium (ImageNet)
-    (32, 3, 512, 512, 14, 14),  # Large
+    (32, 3, 64, 64, 4, 4),
+    (32, 3, 224, 224, 7, 7),
+    (32, 3, 512, 512, 14, 14),
 ]
 
 print("=" * 80)
@@ -29,6 +30,7 @@
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"PyTorch device: {device.upper()}")
+print(f"TensorFlow device: {tf.config.list_physical_devices('GPU') or 'CPU'}")
 print(f"JAX platform: {os.environ.get('JAX_PLATFORMS')}")
 print("-" * 80)
 
@@ -37,58 +39,67 @@
     print(f"Batch: {batch_size}, Channels: {channels}")
     print("-" * 70)
 
+    # Prepare input numpy array
     x_np = np.random.randn(batch_size, channels, input_h, input_w).astype(
         np.float32
     )
-
     output_size = (output_h, output_w)
 
     # --- PyTorch benchmark ---
     try:
         x_torch = torch.tensor(x_np, device=device)
-        # Warmup
-        for _ in range(5):
+        for _ in range(5):  # Warmup
             _ = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
         if device == "cuda":
             torch.cuda.synchronize()
 
-        # Benchmark
         start = time.perf_counter()
         for _ in range(50):
-            y_torch = torch.nn.functional.adaptive_avg_pool2d(
-                x_torch,
-                output_size,
-            )
+            _ = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
         if device == "cuda":
             torch.cuda.synchronize()
         torch_time = (time.perf_counter() - start) / 50 * 1000
-        print(f"  PyTorch:      {torch_time:.4f} ms")
+        print(f"  PyTorch:       {torch_time:.4f} ms")
     except Exception as e:
-        print(f"  PyTorch:      Error - {str(e)[:60]}")
+        print(f"  PyTorch:       Error - {str(e)[:60]}")
+
+    # --- TensorFlow benchmark ---
+    try:
+        x_tf = tf.convert_to_tensor(x_np)
+        for _ in range(5):
+            out = tf_adaptive_avg_pool(
+                x_tf, output_size=output_size, data_format="channels_first"
+            )
+            _ = out.numpy()  # sync
+
+        start = time.perf_counter()
+        for _ in range(50):
+            out = tf_adaptive_avg_pool(
+                x_tf, output_size=output_size, data_format="channels_first"
+            )
+            _ = out.numpy()  # force sync
+        tf_time = (time.perf_counter() - start) / 50 * 1000
+        print(f"  TensorFlow:    {tf_time:.4f} ms")
+    except Exception as e:
+        print(f"  TensorFlow:    Error - {str(e)[:60]}")
 
     # --- JAX benchmark ---
     try:
         x_jax = jnp.array(x_np)
-        # Warmup
-        for _ in range(5):
+        for _ in range(5):  # Warmup
             jax_adaptive_avg_pool(
-                x_jax,
-                output_size,
-                data_format="channels_first",
+                x_jax, output_size, data_format="channels_first"
             ).block_until_ready()
 
-        # Benchmark
         start = time.perf_counter()
         for _ in range(50):
             jax_adaptive_avg_pool(
-                x_jax,
-                output_size,
-                data_format="channels_first",
+                x_jax, output_size, data_format="channels_first"
             ).block_until_ready()
         jax_time = (time.perf_counter() - start) / 50 * 1000
-        print(f"  JAX (Keras):  {jax_time:.4f} ms")
+        print(f"  JAX (Keras):   {jax_time:.4f} ms")
     except Exception as e:
-        print(f"  JAX (Keras):  Error - {str(e)[:60]}")
+        print(f"  JAX (Keras):   Error - {str(e)[:60]}")
 
 print("\n" + "=" * 80)
 print("✅ Benchmark complete!")
diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
index 7cdf5cd1b042..b4d70fb4c2b3 100644
--- a/keras/src/layers/pooling/test_training_adaptive_pooling.py
+++ b/keras/src/layers/pooling/test_training_adaptive_pooling.py
@@ -37,7 +37,7 @@ def make_model(pool_type="avg"):
 @pytest.mark.parametrize("pool", ["avg", "max"])
 def test_training_adaptive_pooling(pool):
     # Skip backends where training is unsupported
-    if K.backend() in ["numpy", "openvino", "tensorflow", "jax"]:
+    if K.backend() in ["numpy", "openvino"]:
         pytest.skip(
             f"fit or adaptive pooling not supported for backend: {K.backend()}"
         )

From 248773f33bbbf32c3718a76371fb404dd737151d Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Sun, 9 Nov 2025 00:06:51 +0530
Subject: [PATCH 09/13] Fix adaptive pooling implementation

---
 keras/src/backend/numpy/nn.py                    | 16 ++++++++++++++++
 .../layers/pooling/adaptive_pooling1d_test.py    |  2 +-
 .../layers/pooling/adaptive_pooling2d_test.py    |  2 +-
 .../layers/pooling/adaptive_pooling3d_test.py    |  2 +-
 .../pooling/test_training_adaptive_pooling.py    |  1 -
 5 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/keras/src/backend/numpy/nn.py b/keras/src/backend/numpy/nn.py
index 44f3fb882e12..a5f3e762da4e 100644
--- a/keras/src/backend/numpy/nn.py
+++ b/keras/src/backend/numpy/nn.py
@@ -1237,3 +1237,19 @@ def _pair(x):
 
     # ---- reshape -> (N, C*kH*kW, L) ----
     return patches.reshape(N, C * k[0] * k[1], -1)
+
+
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    """Adaptive max pooling - Numpy backend not yet supported."""
+    raise NotImplementedError(
+        "Adaptive pooling not implemented for Numpy. "
+        "Use JAX, Torch or Tensorflow backend."
+    )
+
+
+def adaptive_avg_pool(inputs, output_size, data_format=None):
+    """Adaptive average pooling - Numpy backend not yet supported."""
+    raise NotImplementedError(
+        "Adaptive pooling not implemented for Numpy. "
+        "Use JAX, Torch or Tensorflow backend."
+    )
diff --git a/keras/src/layers/pooling/adaptive_pooling1d_test.py b/keras/src/layers/pooling/adaptive_pooling1d_test.py
index 7f0c60e38076..61bda31cefea 100644
--- a/keras/src/layers/pooling/adaptive_pooling1d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling1d_test.py
@@ -8,7 +8,7 @@
 from keras.src import ops
 from keras.src import testing
 
-SKIP_BACKENDS = ["openvino"]
+SKIP_BACKENDS = ["openvino", "numpy"]
 
 pytestmark = pytest.mark.skipif(
     K.backend() in SKIP_BACKENDS,
diff --git a/keras/src/layers/pooling/adaptive_pooling2d_test.py b/keras/src/layers/pooling/adaptive_pooling2d_test.py
index d6f48a46ab86..cd6de8eec5de 100644
--- a/keras/src/layers/pooling/adaptive_pooling2d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling2d_test.py
@@ -8,7 +8,7 @@
 from keras.src import ops
 from keras.src import testing
 
-SKIP_BACKENDS = ["openvino"]
+SKIP_BACKENDS = ["openvino", "numpy"]
 
 pytestmark = pytest.mark.skipif(
     K.backend() in SKIP_BACKENDS,
diff --git a/keras/src/layers/pooling/adaptive_pooling3d_test.py b/keras/src/layers/pooling/adaptive_pooling3d_test.py
index 138b24274eee..188880964229 100644
--- a/keras/src/layers/pooling/adaptive_pooling3d_test.py
+++ b/keras/src/layers/pooling/adaptive_pooling3d_test.py
@@ -8,7 +8,7 @@
 from keras.src import ops
 from keras.src import testing
 
-SKIP_BACKENDS = ["openvino"]
+SKIP_BACKENDS = ["openvino", "numpy"]
 
 pytestmark = pytest.mark.skipif(
     K.backend() in SKIP_BACKENDS,
diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
index b4d70fb4c2b3..13a85e2b52af 100644
--- a/keras/src/layers/pooling/test_training_adaptive_pooling.py
+++ b/keras/src/layers/pooling/test_training_adaptive_pooling.py
@@ -6,7 +6,6 @@
 from keras.src import layers
 from keras.src import models
 
-np.random.seed(42)
 x_train = np.random.randn(1000, 32, 32, 3).astype(np.float32)
 y_train = np.random.randint(0, 10, 1000)
 x_val = np.random.randn(200, 32, 32, 3).astype(np.float32)

From 53a5dc93b2f3b4fb0e988240deaf20b52e70d331 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Sun, 9 Nov 2025 15:58:08 +0530
Subject: [PATCH 10/13] Fix adaptive pooling implementation

---
 keras/src/layers/pooling/test_training_adaptive_pooling.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
index 13a85e2b52af..dc93ce2faa14 100644
--- a/keras/src/layers/pooling/test_training_adaptive_pooling.py
+++ b/keras/src/layers/pooling/test_training_adaptive_pooling.py
@@ -6,10 +6,11 @@
 from keras.src import layers
 from keras.src import models
 
+np.random.seed(42)
 x_train = np.random.randn(1000, 32, 32, 3).astype(np.float32)
-y_train = np.random.randint(0, 10, 1000)
+y_train = np.random.randint(0, 10, 1000).astype(np.int32)
 x_val = np.random.randn(200, 32, 32, 3).astype(np.float32)
-y_val = np.random.randint(0, 10, 200)
+y_val = np.random.randint(0, 10, 200).astype(np.int32)
 
 
 def make_model(pool_type="avg"):
@@ -36,7 +37,7 @@ def make_model(pool_type="avg"):
 @pytest.mark.parametrize("pool", ["avg", "max"])
 def test_training_adaptive_pooling(pool):
     # Skip backends where training is unsupported
-    if K.backend() in ["numpy", "openvino"]:
+    if K.backend() in ["numpy", "openvino", "tensorflow"]:
         pytest.skip(
             f"fit or adaptive pooling not supported for backend: {K.backend()}"
         )

From 2727a24ea24b34e5db49f6d63690d64c3783d8c8 Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Sun, 9 Nov 2025 16:16:06 +0530
Subject: [PATCH 11/13] Fix adaptive pooling implementation

---
 .../pooling/benchmark_adaptive_pooling.py     | 106 ------------------
 .../pooling/test_training_adaptive_pooling.py |  66 -----------
 2 files changed, 172 deletions(-)
 delete mode 100644 keras/src/layers/pooling/benchmark_adaptive_pooling.py
 delete mode 100644 keras/src/layers/pooling/test_training_adaptive_pooling.py

diff --git a/keras/src/layers/pooling/benchmark_adaptive_pooling.py b/keras/src/layers/pooling/benchmark_adaptive_pooling.py
deleted file mode 100644
index dbe5e67e44b6..000000000000
--- a/keras/src/layers/pooling/benchmark_adaptive_pooling.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import os
-
-# Environment setup before imports
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-os.environ["KERAS_BACKEND"] = "tensorflow"  # change to 'jax' for JAX backend
-os.environ["JAX_PLATFORMS"] = "cpu"  # or 'gpu' if configured
-
-import time
-
-import jax.numpy as jnp
-import numpy as np
-import tensorflow as tf
-import torch
-
-from keras.src.backend.jax.nn import adaptive_avg_pool as jax_adaptive_avg_pool
-from keras.src.backend.tensorflow.nn import (
-    adaptive_avg_pool as tf_adaptive_avg_pool,
-)
-
-# Test configurations (batch, channels, H, W, output H, output W)
-test_cases = [
-    (32, 3, 64, 64, 4, 4),
-    (32, 3, 224, 224, 7, 7),
-    (32, 3, 512, 512, 14, 14),
-]
-
-print("=" * 80)
-print("🔥 Adaptive Average Pooling Benchmark")
-print("=" * 80)
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"PyTorch device: {device.upper()}")
-print(f"TensorFlow device: {tf.config.list_physical_devices('GPU') or 'CPU'}")
-print(f"JAX platform: {os.environ.get('JAX_PLATFORMS')}")
-print("-" * 80)
-
-for batch_size, channels, input_h, input_w, output_h, output_w in test_cases:
-    print(f"\nInput: {input_h}x{input_w} → Output: {output_h}x{output_w}")
-    print(f"Batch: {batch_size}, Channels: {channels}")
-    print("-" * 70)
-
-    # Prepare input numpy array
-    x_np = np.random.randn(batch_size, channels, input_h, input_w).astype(
-        np.float32
-    )
-    output_size = (output_h, output_w)
-
-    # --- PyTorch benchmark ---
-    try:
-        x_torch = torch.tensor(x_np, device=device)
-        for _ in range(5):  # Warmup
-            _ = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
-        if device == "cuda":
-            torch.cuda.synchronize()
-
-        start = time.perf_counter()
-        for _ in range(50):
-            _ = torch.nn.functional.adaptive_avg_pool2d(x_torch, output_size)
-        if device == "cuda":
-            torch.cuda.synchronize()
-        torch_time = (time.perf_counter() - start) / 50 * 1000
-        print(f"  PyTorch:       {torch_time:.4f} ms")
-    except Exception as e:
-        print(f"  PyTorch:       Error - {str(e)[:60]}")
-
-    # --- TensorFlow benchmark ---
-    try:
-        x_tf = tf.convert_to_tensor(x_np)
-        for _ in range(5):
-            out = tf_adaptive_avg_pool(
-                x_tf, output_size=output_size, data_format="channels_first"
-            )
-            _ = out.numpy()  # sync
-
-        start = time.perf_counter()
-        for _ in range(50):
-            out = tf_adaptive_avg_pool(
-                x_tf, output_size=output_size, data_format="channels_first"
-            )
-            _ = out.numpy()  # force sync
-        tf_time = (time.perf_counter() - start) / 50 * 1000
-        print(f"  TensorFlow:    {tf_time:.4f} ms")
-    except Exception as e:
-        print(f"  TensorFlow:    Error - {str(e)[:60]}")
-
-    # --- JAX benchmark ---
-    try:
-        x_jax = jnp.array(x_np)
-        for _ in range(5):  # Warmup
-            jax_adaptive_avg_pool(
-                x_jax, output_size, data_format="channels_first"
-            ).block_until_ready()
-
-        start = time.perf_counter()
-        for _ in range(50):
-            jax_adaptive_avg_pool(
-                x_jax, output_size, data_format="channels_first"
-            ).block_until_ready()
-        jax_time = (time.perf_counter() - start) / 50 * 1000
-        print(f"  JAX (Keras):   {jax_time:.4f} ms")
-    except Exception as e:
-        print(f"  JAX (Keras):   Error - {str(e)[:60]}")
-
-print("\n" + "=" * 80)
-print("✅ Benchmark complete!")
-print("=" * 80)
diff --git a/keras/src/layers/pooling/test_training_adaptive_pooling.py b/keras/src/layers/pooling/test_training_adaptive_pooling.py
deleted file mode 100644
index dc93ce2faa14..000000000000
--- a/keras/src/layers/pooling/test_training_adaptive_pooling.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# File: keras/src/layers/pooling/test_training_adaptive_pooling.py
-import numpy as np
-import pytest
-
-from keras.src import backend as K
-from keras.src import layers
-from keras.src import models
-
-np.random.seed(42)
-x_train = np.random.randn(1000, 32, 32, 3).astype(np.float32)
-y_train = np.random.randint(0, 10, 1000).astype(np.int32)
-x_val = np.random.randn(200, 32, 32, 3).astype(np.float32)
-y_val = np.random.randint(0, 10, 200).astype(np.int32)
-
-
-def make_model(pool_type="avg"):
-    pool_layer = (
-        layers.AdaptiveAveragePooling2D((4, 4))
-        if pool_type == "avg"
-        else layers.AdaptiveMaxPooling2D((4, 4))
-    )
-    return models.Sequential(
-        [
-            layers.Input(shape=(32, 32, 3)),
-            layers.Conv2D(32, 3, activation="relu", padding="same"),
-            layers.BatchNormalization(),
-            layers.Conv2D(64, 3, activation="relu", padding="same"),
-            pool_layer,
-            layers.Flatten(),
-            layers.Dense(128, activation="relu"),
-            layers.Dropout(0.5),
-            layers.Dense(10, activation="softmax"),
-        ]
-    )
-
-
-@pytest.mark.parametrize("pool", ["avg", "max"])
-def test_training_adaptive_pooling(pool):
-    # Skip backends where training is unsupported
-    if K.backend() in ["numpy", "openvino", "tensorflow"]:
-        pytest.skip(
-            f"fit or adaptive pooling not supported for backend: {K.backend()}"
-        )
-
-    model = make_model(pool)
-    model.compile(
-        optimizer="adam",
-        loss="sparse_categorical_crossentropy",
-        metrics=["accuracy"],
-    )
-
-    history = model.fit(
-        x_train,
-        y_train,
-        validation_data=(x_val, y_val),
-        epochs=1,
-        batch_size=32,
-        verbose=0,
-    )
-
-    # Basic assertions
-    assert "accuracy" in history.history
-    preds = model.predict(
-        np.random.randn(1, 32, 32, 3).astype(np.float32), verbose=0
-    )
-    assert preds.shape == (1, 10)

From 2a94421f236d77db081e028597b38e57ed9aeadd Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Fri, 14 Nov 2025 01:08:52 +0530
Subject: [PATCH 12/13] Fix adaptive pooling implementation

---
 keras/src/backend/jax/nn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/src/backend/jax/nn.py b/keras/src/backend/jax/nn.py
index d21e41b86a0b..7597e4650ada 100644
--- a/keras/src/backend/jax/nn.py
+++ b/keras/src/backend/jax/nn.py
@@ -1800,7 +1800,7 @@ def adaptive_max_pool3d(inputs, output_size, data_format="channels_first"):
     return pooled_w
 
 
-# ---------- Updated Dispatcher ----------
+# ---------- Dispatcher ----------
 def adaptive_avg_pool(inputs, output_size, data_format="channels_first"):
     """Dispatcher for adaptive average pooling (1D, 2D, or 3D)."""
     ndims = inputs.ndim - 2

From edcf848b4350f065a081d7780abbde9285350bdc Mon Sep 17 00:00:00 2001
From: Malyala Karthik <karthikmalyala728@gmail.com>
Date: Sat, 15 Nov 2025 12:46:07 +0530
Subject: [PATCH 13/13] Fix adaptive pooling implementation

---
 keras/src/backend/torch/nn.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras/src/backend/torch/nn.py b/keras/src/backend/torch/nn.py
index 3e9fc05a755d..3e1e87398336 100644
--- a/keras/src/backend/torch/nn.py
+++ b/keras/src/backend/torch/nn.py
@@ -385,7 +385,7 @@ def max_pool(
 
 
 def adaptive_max_pool(inputs, output_size, data_format=None):
-    """Adaptive max pooling (1D/2D/3D) with channels_last support."""
+    """Adaptive max pooling(1D/2D/3D) with channels_last support."""
     inputs = convert_to_tensor(inputs)
     num_spatial_dims = inputs.ndim - 2
 
@@ -504,7 +504,7 @@ def average_pool(
 
 
 def adaptive_avg_pool(inputs, output_size, data_format=None):
-    """Adaptive average pooling (1D/2D/3D) with channels_last support."""
+    """Adaptive average pooling(1D/2D/3D) with channels_last support."""
     inputs = convert_to_tensor(inputs)
     num_spatial_dims = inputs.ndim - 2