[BugFix] Fix PRB serialization

Vincent Moens · Vincent Moens · commit f0cda3183947 · 2025-05-20T11:01:41.000+01:00
ghstack-source-id: a40d39a Pull-Request-resolved: #2963
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -2997,6 +2997,49 @@ def test_prb_update_max_priority(self, max_priority_within_buffer):
             assert rb._sampler._max_priority[0] == 21
             assert rb._sampler._max_priority[1] == 0
 
+    def test_prb_serialization(self, tmpdir):
+        rb = ReplayBuffer(
+            storage=LazyMemmapStorage(max_size=10),
+            sampler=PrioritizedSampler(max_capacity=10, alpha=0.8, beta=0.6),
+        )
+
+        td = TensorDict(
+            {
+                "observations": torch.zeros(1, 3),
+                "actions": torch.zeros(1, 1),
+                "rewards": torch.zeros(1, 1),
+                "next_observations": torch.zeros(1, 3),
+                "terminations": torch.zeros(1, 1, dtype=torch.bool),
+            },
+            batch_size=[1],
+        )
+        rb.extend(td)
+
+        rb.save(tmpdir)
+
+        rb2 = ReplayBuffer(
+            storage=LazyMemmapStorage(max_size=10),
+            sampler=PrioritizedSampler(max_capacity=10, alpha=0.5, beta=0.5),
+        )
+
+        td = TensorDict(
+            {
+                "observations": torch.ones(1, 3),
+                "actions": torch.ones(1, 1),
+                "rewards": torch.ones(1, 1),
+                "next_observations": torch.ones(1, 3),
+                "terminations": torch.ones(1, 1, dtype=torch.bool),
+            },
+            batch_size=[1],
+        )
+        rb2.extend(td)
+        rb2.load(tmpdir)
+        assert len(rb) == 1
+        assert rb.sampler._alpha == rb2.sampler._alpha
+        assert rb.sampler._beta == rb2.sampler._beta
+        assert rb.sampler._max_priority[0] == rb2.sampler._max_priority[0]
+        assert rb.sampler._max_priority[1] == rb2.sampler._max_priority[1]
+
     def test_prb_ndim(self):
         """This test lists all the possible ways of updating the priority of a PRB with RB, TRB and TPRB.
 
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -18,6 +18,8 @@
 import torch
 from tensordict import MemoryMappedTensor, TensorDict
 from tensordict.utils import NestedKey
+
+from torch.utils._pytree import tree_map
 from torchrl._extension import EXTENSION_WARNING
 from torchrl._utils import _replace_last, logger
 from torchrl.data.replay_buffers.storages import Storage, StorageEnsemble, TensorStorage
@@ -676,13 +678,16 @@ def dumps(self, path):
         )
         with open(path / "sampler_metadata.json", "w") as file:
             json.dump(
-                {
-                    "_alpha": self._alpha,
-                    "_beta": self._beta,
-                    "_eps": self._eps,
-                    "_max_priority": self._max_priority,
-                    "_max_capacity": self._max_capacity,
-                },
+                tree_map(
+                    float,
+                    {
+                        "_alpha": self._alpha,
+                        "_beta": self._beta,
+                        "_eps": self._eps,
+                        "_max_priority": self._max_priority,
+                        "_max_capacity": self._max_capacity,
+                    },
+                ),
                 file,
             )
 
@@ -693,7 +698,11 @@ def loads(self, path):
         self._alpha = metadata["_alpha"]
         self._beta = metadata["_beta"]
         self._eps = metadata["_eps"]
-        self._max_priority = metadata["_max_priority"]
+        tree_map(
+            lambda dest, orig: dest.copy_(orig),
+            tuple(self._max_priority),
+            tuple(metadata["_max_priority"]),
+        )
         _max_capacity = metadata["_max_capacity"]
         if _max_capacity != self._max_capacity:
             raise RuntimeError(