Update

vmoens · vmoens · commit 5930cb331363 · 2025-10-22T17:54:47.000-07:00
[ghstack-poisoned]
diff --git a/.github/unittest/llm/scripts_llm/install.sh b/.github/unittest/llm/scripts_llm/install.sh
@@ -30,15 +30,15 @@ git submodule sync && git submodule update --init --recursive
 #printf "Installing PyTorch with cu128"
 #if [[ "$TORCH_VERSION" == "nightly" ]]; then
 #  if [ "${CU_VERSION:-}" == cpu ] ; then
-#      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cpu -U
+#      pip install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cpu -U
 #  else
-#      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cu128 -U
+#      pip install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cu128 -U
 #  fi
 #elif [[ "$TORCH_VERSION" == "stable" ]]; then
 #    if [ "${CU_VERSION:-}" == cpu ] ; then
-#      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cpu
+#      pip install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cpu
 #  else
-#      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu128
+#      pip install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu128
 #  fi
 #else
 #  printf "Failed to install pytorch"
@@ -47,9 +47,10 @@ git submodule sync && git submodule update --init --recursive
 
 # install tensordict
 if [[ "$RELEASE" == 0 ]]; then
-  pip3 install git+https://github.com/pytorch/tensordict.git
+  pip install "pybind11[global]" ninja
+  pip install git+https://github.com/pytorch/tensordict.git
 else
-  pip3 install tensordict
+  pip install tensordict
 fi
 
 # smoke test
diff --git a/test/test_specs.py b/test/test_specs.py
@@ -4585,6 +4585,26 @@ def test_names_repr(self):
         assert "Composite" in repr_str
         assert "obs" in repr_str
 
+    def test_zero_create_names(self):
+        """Test that creating tensors with 'zero' propagates names."""
+        spec = Composite(
+            {"obs": Bounded(low=-1, high=1, shape=(10, 3, 4))},
+            shape=(10,),
+            names=["batch"],
+        )
+        td = spec.zero()
+        td.names = ["batch"]
+
+    def test_rand_create_names(self):
+        """Test that creating tensors with 'rand' propagates names."""
+        spec = Composite(
+            {"obs": Bounded(low=-1, high=1, shape=(10, 3, 4))},
+            shape=(10,),
+            names=["batch"],
+        )
+        td = spec.rand()
+        td.names = ["batch"]
+
 
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
diff --git a/test/test_weightsync.py b/test/test_weightsync.py
@@ -9,11 +9,11 @@
 import pytest
 import torch
 import torch.nn as nn
+from mocking_classes import ContinuousActionVecMockEnv
 from tensordict import TensorDict
 from tensordict.nn import TensorDictModule
 from torch import multiprocessing as mp
 from torchrl.collectors import MultiSyncDataCollector, SyncDataCollector
-from torchrl.envs import GymEnv
 from torchrl.weight_update.weight_sync_schemes import (
     _resolve_model,
     MPTransport,
@@ -274,7 +274,7 @@ def test_no_weight_sync_scheme(self):
 class TestCollectorIntegration:
     @pytest.fixture
     def simple_env(self):
-        return GymEnv("CartPole-v1")
+        return ContinuousActionVecMockEnv()
 
     @pytest.fixture
     def simple_policy(self, simple_env):
@@ -291,7 +291,7 @@ def test_syncdatacollector_multiprocess_scheme(self, simple_policy):
         scheme = MultiProcessWeightSyncScheme(strategy="state_dict")
 
         collector = SyncDataCollector(
-            create_env_fn=lambda: GymEnv("CartPole-v1"),
+            create_env_fn=ContinuousActionVecMockEnv,
             policy=simple_policy,
             frames_per_batch=64,
             total_frames=128,
@@ -316,8 +316,8 @@ def test_multisyncdatacollector_multiprocess_scheme(self, simple_policy):
 
         collector = MultiSyncDataCollector(
             create_env_fn=[
-                lambda: GymEnv("CartPole-v1"),
-                lambda: GymEnv("CartPole-v1"),
+                ContinuousActionVecMockEnv,
+                ContinuousActionVecMockEnv,
             ],
             policy=simple_policy,
             frames_per_batch=64,
@@ -343,8 +343,8 @@ def test_multisyncdatacollector_shared_mem_scheme(self, simple_policy):
 
         collector = MultiSyncDataCollector(
             create_env_fn=[
-                lambda: GymEnv("CartPole-v1"),
-                lambda: GymEnv("CartPole-v1"),
+                ContinuousActionVecMockEnv,
+                ContinuousActionVecMockEnv,
             ],
             policy=simple_policy,
             frames_per_batch=64,
@@ -369,7 +369,7 @@ def test_collector_no_weight_sync(self, simple_policy):
         scheme = NoWeightSyncScheme()
 
         collector = SyncDataCollector(
-            create_env_fn=lambda: GymEnv("CartPole-v1"),
+            create_env_fn=ContinuousActionVecMockEnv,
             policy=simple_policy,
             frames_per_batch=64,
             total_frames=128,
@@ -385,7 +385,7 @@ def test_collector_no_weight_sync(self, simple_policy):
 
 class TestMultiModelUpdates:
     def test_multi_model_state_dict_updates(self):
-        env = GymEnv("CartPole-v1")
+        env = ContinuousActionVecMockEnv()
 
         policy = TensorDictModule(
             nn.Linear(
@@ -407,7 +407,7 @@ def test_multi_model_state_dict_updates(self):
         }
 
         collector = SyncDataCollector(
-            create_env_fn=lambda: GymEnv("CartPole-v1"),
+            create_env_fn=ContinuousActionVecMockEnv,
             policy=policy,
             frames_per_batch=64,
             total_frames=128,
@@ -438,7 +438,7 @@ def test_multi_model_state_dict_updates(self):
         env.close()
 
     def test_multi_model_tensordict_updates(self):
-        env = GymEnv("CartPole-v1")
+        env = ContinuousActionVecMockEnv()
 
         policy = TensorDictModule(
             nn.Linear(
@@ -460,7 +460,7 @@ def test_multi_model_tensordict_updates(self):
         }
 
         collector = SyncDataCollector(
-            create_env_fn=lambda: GymEnv("CartPole-v1"),
+            create_env_fn=ContinuousActionVecMockEnv,
             policy=policy,
             frames_per_batch=64,
             total_frames=128,
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -5740,16 +5740,22 @@ def rand(self, shape: torch.Size = None) -> TensorDictBase:
         for key, item in self.items():
             if item is not None:
                 _dict[key] = item.rand(shape)
-        if self.data_cls is None:
-            cls = TensorDict
+
+        cls = self.data_cls if self.data_cls is not None else TensorDict
+        if cls is not TensorDict:
+            kwargs = {}
+            if self._td_dim_names is not None:
+                warnings.warn(f"names for cls {cls} is not supported for rand.")
         else:
-            cls = self.data_cls
+            kwargs = {"names": self._td_dim_names}
+
         # No need to run checks since we know Composite is compliant with
         # TensorDict requirements
         return cls.from_dict(
             _dict,
             batch_size=_size([*shape, *_remove_neg_shapes(self.shape)]),
             device=self.device,
+            **kwargs,
         )
 
     def keys(
@@ -6017,10 +6023,13 @@ def zero(self, shape: torch.Size = None) -> TensorDictBase:
         except RuntimeError:
             device = self._device
 
-        if self.data_cls is not None:
-            cls = self.data_cls
+        cls = self.data_cls if self.data_cls is not None else TensorDict
+        if cls is not TensorDict:
+            kwargs = {}
+            if self._td_dim_names is not None:
+                warnings.warn(f"names for cls {cls} is not supported for zero.")
         else:
-            cls = TensorDict
+            kwargs = {"names": self._td_dim_names}
 
         return cls.from_dict(
             {
@@ -6030,6 +6039,7 @@ def zero(self, shape: torch.Size = None) -> TensorDictBase:
             },
             batch_size=_size([*shape, *self._safe_shape]),
             device=device,
+            **kwargs,
         )
 
     def __eq__(self, other: object) -> bool:
diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py
@@ -1255,7 +1255,12 @@ def _build_gym_env(self, env, pixels_only):  # noqa: F811
 
     @property
     def lib(self) -> ModuleType:
-        return gym_backend()
+        gym = gym_backend()
+        if gym is None:
+            raise RuntimeError(
+                "Gym backend is not available. Please install gym or gymnasium."
+            )
+        return gym
 
     def _set_seed(self, seed: int | None) -> None:  # noqa: F811
         if self._seed_calls_reset is None: