[Feature] TensorDictPrimer with single default_value callable

Vincent Moens · Vincent Moens · commit 59e85458bfbb · 2025-03-03T15:48:51.000Z
ghstack-source-id: a9a677f Pull Request resolved: #2732
diff --git a/torchrl/envs/custom/pendulum.py b/torchrl/envs/custom/pendulum.py
@@ -269,11 +269,20 @@ def _reset(self, tensordict):
         batch_size = (
             tensordict.batch_size if tensordict is not None else self.batch_size
         )
-        if tensordict is None or tensordict.is_empty():
+        if tensordict is None or "params" not in tensordict:
             # if no ``tensordict`` is passed, we generate a single set of hyperparameters
             # Otherwise, we assume that the input ``tensordict`` contains all the relevant
             # parameters to get started.
             tensordict = self.gen_params(batch_size=batch_size, device=self.device)
+        elif "th" in tensordict and "thdot" in tensordict:
+            # we can hard-reset the env too
+            return tensordict
+        out = self._reset_random_data(
+            tensordict.shape, batch_size, tensordict["params"]
+        )
+        return out
+
+    def _reset_random_data(self, shape, batch_size, params):
 
         high_th = torch.tensor(self.DEFAULT_X, device=self.device)
         high_thdot = torch.tensor(self.DEFAULT_Y, device=self.device)
@@ -284,20 +293,20 @@ def _reset(self, tensordict):
         # of simulators run simultaneously. In other contexts, the initial
         # random state's shape will depend upon the environment batch-size instead.
         th = (
-            torch.rand(tensordict.shape, generator=self.rng, device=self.device)
+            torch.rand(shape, generator=self.rng, device=self.device)
             * (high_th - low_th)
             + low_th
         )
         thdot = (
-            torch.rand(tensordict.shape, generator=self.rng, device=self.device)
+            torch.rand(shape, generator=self.rng, device=self.device)
             * (high_thdot - low_thdot)
             + low_thdot
         )
         out = TensorDict(
             {
                 "th": th,
                 "thdot": thdot,
-                "params": tensordict["params"],
+                "params": params,
             },
             batch_size=batch_size,
         )
diff --git a/torchrl/envs/transforms/rlhf.py b/torchrl/envs/transforms/rlhf.py
@@ -2,6 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 from copy import copy, deepcopy
 
 import torch
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,8 @@`
`2`	`2`	`#`
`3`	`3`	`# This source code is licensed under the MIT license found in the`
`4`	`4`	`# LICENSE file in the root directory of this source tree.`
	`5`	`+from __future__ import annotations`
	`6`	`+`
`5`	`7`	`from copy import copy, deepcopy`
`6`	`8`
`7`	`9`	`import torch`