Fixed review comments

amitsrivastava78 · amitsrivastava78 · commit 7722e301c578 · 2025-10-31T09:32:46.000+05:30
diff --git a/keras/src/backend/jax/__init__.py b/keras/src/backend/jax/__init__.py
@@ -14,6 +14,7 @@
 from keras.src.backend.jax.core import cast
 from keras.src.backend.jax.core import compute_output_spec
 from keras.src.backend.jax.core import cond
+from keras.src.backend.jax.core import convert_checkpoint_value
 from keras.src.backend.jax.core import convert_to_numpy
 from keras.src.backend.jax.core import convert_to_tensor
 from keras.src.backend.jax.core import device_scope
diff --git a/keras/src/backend/jax/core.py b/keras/src/backend/jax/core.py
@@ -572,3 +572,35 @@ def device_scope(device_name):
     else:
         jax_device = device_name
     return jax.default_device(jax_device)
+
+
+def convert_checkpoint_value(value, dtype, shape):
+    """Convert a value for checkpoint restoration, preserving JAX arrays for
+    sharding.
+
+    This function handles the special case of checkpoint restoration where JAX
+    arrays should be preserved for sharding support, while other values are
+    converted to JAX arrays with the specified dtype and shape.
+
+    Args:
+        value: The value to convert (can be JAX array, numpy array, or other
+            types)
+        dtype: The target dtype
+        shape: The target shape
+
+    Returns:
+        A JAX array with the specified dtype and shape, or the original JAX
+        array if it was already a JAX array.
+    """
+    # For JAX backend, preserve JAX arrays for sharding support
+    if hasattr(value, "__array_namespace__") or str(type(value)).startswith(
+        "<class 'jax"
+    ):
+        # value is already a JAX array, return as-is to preserve sharding
+        return value
+    elif isinstance(value, np.ndarray):
+        # Convert numpy array to JAX array
+        return jnp.array(value).astype(dtype).reshape(shape)
+    else:
+        # Convert other types to JAX array
+        return jnp.array(value, dtype=dtype).reshape(shape)
diff --git a/keras/src/backend/tensorflow/__init__.py b/keras/src/backend/tensorflow/__init__.py
@@ -13,6 +13,7 @@
 from keras.src.backend.tensorflow.core import cast
 from keras.src.backend.tensorflow.core import compute_output_spec
 from keras.src.backend.tensorflow.core import cond
+from keras.src.backend.tensorflow.core import convert_checkpoint_value
 from keras.src.backend.tensorflow.core import convert_to_numpy
 from keras.src.backend.tensorflow.core import convert_to_tensor
 from keras.src.backend.tensorflow.core import device_scope
diff --git a/keras/src/backend/tensorflow/core.py b/keras/src/backend/tensorflow/core.py
@@ -696,3 +696,23 @@ def __exit__(self, *args, **kwargs):
 
 def device_scope(device_name):
     return tf.device(device_name)
+
+
+def convert_checkpoint_value(value, dtype, shape):
+    """Convert a value for checkpoint restoration.
+
+    For TensorFlow backend, convert to numpy arrays with specified dtype and
+    shape.
+
+    Args:
+        value: The value to convert
+        dtype: The target dtype
+        shape: The target shape
+
+    Returns:
+        A numpy array with the specified dtype and shape.
+    """
+    if isinstance(value, np.ndarray):
+        return value.astype(dtype).reshape(shape)
+    else:
+        return np.array(value, dtype=dtype).reshape(shape)
diff --git a/keras/src/backend/torch/__init__.py b/keras/src/backend/torch/__init__.py
@@ -29,6 +29,7 @@
 from keras.src.backend.torch.core import cast
 from keras.src.backend.torch.core import compute_output_spec
 from keras.src.backend.torch.core import cond
+from keras.src.backend.torch.core import convert_checkpoint_value
 from keras.src.backend.torch.core import convert_to_numpy
 from keras.src.backend.torch.core import convert_to_tensor
 from keras.src.backend.torch.core import device_scope
diff --git a/keras/src/backend/torch/core.py b/keras/src/backend/torch/core.py
@@ -730,3 +730,22 @@ def backward(ctx, grad_output):
         if not isinstance(grads, tuple):
             grads = (grads,)
         return (None,) + grads
+
+
+def convert_checkpoint_value(value, dtype, shape):
+    """Convert a value for checkpoint restoration.
+
+    For PyTorch backend, convert to numpy arrays with specified dtype and shape.
+
+    Args:
+        value: The value to convert
+        dtype: The target dtype
+        shape: The target shape
+
+    Returns:
+        A numpy array with the specified dtype and shape.
+    """
+    if isinstance(value, np.ndarray):
+        return value.astype(dtype).reshape(shape)
+    else:
+        return np.array(value, dtype=dtype).reshape(shape)
diff --git a/keras/src/callbacks/orbax_checkpoint.py b/keras/src/callbacks/orbax_checkpoint.py
@@ -4,7 +4,6 @@
 import numpy as np
 
 from keras.src import backend
-from keras.src import ops
 from keras.src import tree
 from keras.src.api_export import keras_export
 from keras.src.callbacks.monitor_callback import (
@@ -33,11 +32,6 @@ def convert_scalars(obj):
     return tree.map_structure(convert_scalars, state_tree)
 
 
-def _flatten_state_tree_values(state_tree):
-    """Flatten nested state tree into a list of values in consistent order."""
-    return tree.flatten(state_tree)
-
-
 def _reconstruct_state_tree_with_values(structure, values):
     """Reconstruct state tree structure with provided values."""
     value_iter = iter(values)
@@ -62,64 +56,14 @@ def _reconstruct_value(obj):
                 return np.array(value, dtype=obj.dtype)
         elif isinstance(obj, np.ndarray):
             # obj is a numpy array
-            if isinstance(value, np.ndarray):
-                return value.astype(obj.dtype).reshape(obj.shape)
-            else:
-                return np.array(value, dtype=obj.dtype).reshape(obj.shape)
+            # Use backend-specific conversion that handles JAX arrays properly
+            return backend.convert_checkpoint_value(value, obj.dtype, obj.shape)
         else:
             return value
 
     return tree.map_structure(_reconstruct_value, structure)
 
 
-def _restore_legacy_format(
-    checkpoint_data, target_model, save_optimizer_state, save_metrics_state
-):
-    """Restore from the old flat format for backward compatibility."""
-    # Restore model weights
-    if "model_weights" in checkpoint_data:
-        model_weights_np = checkpoint_data["model_weights"]
-        # Convert NumPy arrays back to backend tensors and assign to
-        # model
-        for i, weight_np in enumerate(model_weights_np):
-            # Convert numpy array back to appropriate backend tensor
-            weight_tensor = ops.convert_to_tensor(weight_np)
-            target_model.weights[i].assign(weight_tensor)
-
-    # Restore optimizer state if available
-    if "optimizer_state" in checkpoint_data and save_optimizer_state:
-        optimizer_vars_np = checkpoint_data["optimizer_state"]
-        # Only restore if the variable counts match
-        if len(optimizer_vars_np) == len(target_model.optimizer.variables):
-            # Convert NumPy arrays back to backend tensors and assign to
-            # optimizer
-            for i, var_np in enumerate(optimizer_vars_np):
-                var_tensor = ops.convert_to_tensor(var_np)
-                target_model.optimizer.variables[i].assign(var_tensor)
-
-    # Restore metrics state if available
-    if (
-        "metrics_state" in checkpoint_data
-        and save_metrics_state
-        and hasattr(target_model, "metrics")
-    ):
-        metrics_vars_np = checkpoint_data["metrics_state"]
-        metric_idx = 0
-        for metric in target_model.metrics:
-            if (
-                hasattr(metric, "variables")
-                and metric.variables
-                and metric_idx < len(metrics_vars_np)
-            ):
-                metric_vars_np = metrics_vars_np[metric_idx]
-                # Restore metric variables
-                for i, var_np in enumerate(metric_vars_np):
-                    if i < len(metric.variables):
-                        var_tensor = ops.convert_to_tensor(var_np)
-                        metric.variables[i].assign(var_tensor)
-                metric_idx += 1
-
-
 @keras_export("keras.callbacks.OrbaxCheckpoint")
 class OrbaxCheckpoint(MonitorCallback):
     """Callback to save and load model state using Orbax with a similar API to
@@ -574,14 +518,8 @@ def _restore_model_state(self, checkpoint_data, model=None):
                 checkpoint_data["model_state"], target_model
             )
         else:
-            # Fallback to legacy format
-            _restore_legacy_format(
-                checkpoint_data,
-                target_model,
-                self.save_optimizer_state,
-                self.save_metrics_state,
-            )
-            return True
+            # Unsupported checkpoint format
+            return False
 
     def _restore_from_nested_structures(self, checkpoint_data, target_model):
         """Restore from the new nested structures format."""