[CI] Fix old deps CI (#3165)

vmoens · web-flow · commit 66b9a217d775 · 2025-09-11T11:39:46.000+01:00
diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
@@ -34,6 +34,7 @@ CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_pa
     --ignore test/test_distributed.py \
     --ignore test/test_rlhf.py \
     --ignore test/llm \
+    -k "not HalfCheetah-v2" \
     --mp_fork_if_no_cuda
 
 #pytest --instafail -v --durations 200
diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh
@@ -100,7 +100,6 @@ conda env config vars set \
   SDL_VIDEODRIVER=dummy \
   DISPLAY=unix:0.0 \
   PYOPENGL_PLATFORM=egl \
-  LD_PRELOAD=$glew_path \
   NVIDIA_PATH=/usr/src/nvidia-470.63.01 \
   MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \
   MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \
diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml
@@ -101,7 +101,7 @@ jobs:
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
       repository: pytorch/rl
-      docker-image: "nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04"
+      docker-image: "nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04"
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda_arch_version }}
       timeout: 90
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -4266,11 +4266,15 @@ def test_sac_deactivate_vmap(
             loss_fn_no_vmap.make_value_estimator(td_est)
 
         torch.manual_seed(0)
-        with _check_td_steady(td), pytest.warns(
-            UserWarning, match="No target network updater"
-        ):
-            loss_no_vmap = loss_fn_no_vmap(td)
-        assert_allclose_td(loss_vmap, loss_no_vmap)
+        with pytest.raises(
+            NotImplementedError,
+            match="This implementation is not supported for torch<2.7",
+        ) if torch.__version__ < "2.7" else contextlib.nullcontext():
+            with _check_td_steady(td), pytest.warns(
+                UserWarning, match="No target network updater"
+            ):
+                loss_no_vmap = loss_fn_no_vmap(td)
+            assert_allclose_td(loss_vmap, loss_no_vmap)
 
     @pytest.mark.parametrize("delay_value", (True, False))
     @pytest.mark.parametrize("delay_actor", (True, False))
@@ -5235,12 +5239,16 @@ def test_discrete_sac_deactivate_vmap(
         if td_est is not None:
             loss_fn_no_vmap.make_value_estimator(td_est)
 
-        with _check_td_steady(td), pytest.warns(
-            UserWarning, match="No target network updater"
-        ):
-            torch.manual_seed(1)
-            loss_no_vmap = loss_fn_no_vmap(td)
-        assert_allclose_td(loss_vmap, loss_no_vmap)
+        with pytest.raises(
+            NotImplementedError,
+            match="This implementation is not supported for torch<2.7",
+        ) if torch.__version__ < "2.7" else contextlib.nullcontext():
+            with _check_td_steady(td), pytest.warns(
+                UserWarning, match="No target network updater"
+            ):
+                torch.manual_seed(1)
+                loss_no_vmap = loss_fn_no_vmap(td)
+            assert_allclose_td(loss_vmap, loss_no_vmap)
 
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [2])
@@ -5979,10 +5987,14 @@ def test_crossq_deactivate_vmap(
         if td_est is not None:
             loss_fn_no_vmap.make_value_estimator(td_est)
 
-        with _check_td_steady(td):
-            torch.manual_seed(1)
-            loss_no_vmap = loss_fn_no_vmap(td)
-        assert_allclose_td(loss_vmap, loss_no_vmap)
+        with pytest.raises(
+            NotImplementedError,
+            match="This implementation is not supported for torch<2.7",
+        ) if torch.__version__ < "2.7" else contextlib.nullcontext():
+            with _check_td_steady(td):
+                torch.manual_seed(1)
+                loss_no_vmap = loss_fn_no_vmap(td)
+            assert_allclose_td(loss_vmap, loss_no_vmap)
 
     @pytest.mark.parametrize("num_qvalue", [2])
     @pytest.mark.parametrize("device", get_default_devices())
@@ -7725,12 +7737,16 @@ def test_cql_deactivate_vmap(
         if td_est is not None:
             loss_fn_no_vmap.make_value_estimator(td_est)
 
-        with _check_td_steady(td), pytest.warns(
-            UserWarning, match="No target network updater"
-        ):
-            torch.manual_seed(1)
-            loss_no_vmap = loss_fn_no_vmap(td)
-        assert_allclose_td(loss_vmap, loss_no_vmap)
+        with pytest.raises(
+            NotImplementedError,
+            match="This implementation is not supported for torch<2.7",
+        ) if torch.__version__ < "2.7" else contextlib.nullcontext():
+            with _check_td_steady(td), pytest.warns(
+                UserWarning, match="No target network updater"
+            ):
+                torch.manual_seed(1)
+                loss_no_vmap = loss_fn_no_vmap(td)
+            assert_allclose_td(loss_vmap, loss_no_vmap)
 
     @pytest.mark.parametrize("delay_actor", (True,))
     @pytest.mark.parametrize("delay_qvalue", (True,))
@@ -12796,12 +12812,16 @@ def test_iql_deactivate_vmap(
         if td_est is not None:
             loss_fn_no_vmap.make_value_estimator(td_est)
 
-        with _check_td_steady(td), pytest.warns(
-            UserWarning, match="No target network updater"
-        ):
-            torch.manual_seed(1)
-            loss_no_vmap = loss_fn_no_vmap(td)
-        assert_allclose_td(loss_vmap, loss_no_vmap)
+        with pytest.raises(
+            NotImplementedError,
+            match="This implementation is not supported for torch<2.7",
+        ) if torch.__version__ < "2.7" else contextlib.nullcontext():
+            with _check_td_steady(td), pytest.warns(
+                UserWarning, match="No target network updater"
+            ):
+                torch.manual_seed(1)
+                loss_no_vmap = loss_fn_no_vmap(td)
+            assert_allclose_td(loss_vmap, loss_no_vmap)
 
     @pytest.mark.parametrize("num_qvalue", [2])
     @pytest.mark.parametrize("device", get_default_devices())
@@ -14507,10 +14527,14 @@ def test_gae_recurrent(self, module):
             shifted=False,
             deactivate_vmap=True,
         )
-        with set_recurrent_mode(True):
-            r1 = gae(vals.copy())
-        a1 = r1["advantage"]
-        torch.testing.assert_close(a0, a1)
+        with pytest.raises(
+            NotImplementedError,
+            match="This implementation is not supported for torch<2.7",
+        ) if torch.__version__ < "2.7" else contextlib.nullcontext():
+            with set_recurrent_mode(True):
+                r1 = gae(vals.copy())
+            a1 = r1["advantage"]
+            torch.testing.assert_close(a0, a1)
 
     @pytest.mark.parametrize("device", get_default_devices())
     @pytest.mark.parametrize("gamma", [0.1, 0.5, 0.99])
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -2782,14 +2782,21 @@ class TestVmas:
     @pytest.mark.parametrize("scenario_name", VmasWrapper.available_envs)
     @pytest.mark.parametrize("continuous_actions", [True, False])
     def test_all_vmas_scenarios(self, scenario_name, continuous_actions):
+        # Skip football scenario due to VMAS bug: IndexError in get_wall_separations
+        if scenario_name == "football":
+            pytest.skip(
+                "Football scenario has a shape mismatch bug in VMAS get_wall_separations method"
+            )
+
         env = VmasEnv(
             scenario=scenario_name,
             continuous_actions=continuous_actions,
             num_envs=4,
         )
         env.set_seed(0)
-        env.reset()
-        env.rollout(10)
+        env.check_env_specs()
+        env.rollout(10, break_when_any_done=False)
+        env.check_env_specs()
         env.close()
 
     @pytest.mark.parametrize(
diff --git a/torchrl/_utils.py b/torchrl/_utils.py
@@ -449,8 +449,8 @@ class implement_for:
     def __init__(
         self,
         module_name: str | Callable,
-        from_version: str = None,
-        to_version: str = None,
+        from_version: str | None = None,
+        to_version: str | None = None,
         *,
         class_method: bool = False,
         compilable: bool = False,
diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py
@@ -48,9 +48,9 @@
 )
 
 try:
-    from torch.compiler import is_compiling
+    from torch.compiler import disable as compile_disable, is_compiling
 except ImportError:
-    from torch._dynamo import is_compiling
+    from torch._dynamo import disable as compile_disable, is_compiling
 
 
 class Storage:
@@ -104,7 +104,6 @@ def _attached_entities(self) -> list:
         return _attached_entities_list
 
     # TODO: Check this
-    # @torch.compiler.disable()
     @torch._dynamo.assume_constant_result
     def _attached_entities_iter(self):
         return self._attached_entities
@@ -165,7 +164,7 @@ def _empty(self):
         ...
 
     # TODO: Without this disable, compiler recompiles due to changing len(self) guards.
-    @torch.compiler.disable()
+    @compile_disable()
     def _rand_given_ndim(self, batch_size):
         # a method to return random indices given the storage ndim
         if self.ndim == 1:
@@ -702,12 +701,12 @@ def shape(self):
 
     # TODO: Without this disable, compiler recompiles for back-to-back calls.
     # Figuring out a way to avoid this disable would give better performance.
-    @torch.compiler.disable()
+    @compile_disable()
     def _rand_given_ndim(self, batch_size):
         return self._rand_given_ndim_impl(batch_size)
 
     # At the moment, this is separated into its own function so that we can test
-    # it without the `torch._dynamo.disable` and detect if future updates to the
+    # it without the `disable` and detect if future updates to the
     # compiler fix the recompile issue.
     def _rand_given_ndim_impl(self, batch_size):
         if self.ndim == 1:
@@ -978,7 +977,7 @@ def get(self, index: int | Sequence[int] | slice) -> Any:
             return tree_map(lambda x: x[index], storage)
 
     # TODO: Without this disable, compiler recompiles due to changing _len_value guards.
-    @torch.compiler.disable()
+    @compile_disable()
     def __len__(self):
         return self._len
 
diff --git a/torchrl/data/replay_buffers/writers.py b/torchrl/data/replay_buffers/writers.py
@@ -21,6 +21,11 @@
 from torch import multiprocessing as mp
 from torchrl._utils import _STRDTYPE2DTYPE
 
+try:
+    from torch.compiler import disable as compile_disable
+except ImportError:
+    from torch._dynamo import disable as compile_disable
+
 try:
     from torch.utils._pytree import tree_leaves
 except ImportError:
@@ -221,7 +226,7 @@ def _empty(self, empty_write_count: bool = True) -> None:
 
     # TODO: Workaround for PyTorch nightly regression where compiler can't handle
     # method calls on objects returned from _attached_entities_iter()
-    @torch.compiler.disable()
+    @compile_disable()
     def _mark_update_entities(self, index: torch.Tensor) -> None:
         """Mark entities as updated with the given index."""
         for ent in self._storage._attached_entities_iter():
@@ -579,7 +584,7 @@ def extend(self, data: TensorDictBase) -> None:
 
     # TODO: Workaround for PyTorch nightly regression where compiler can't handle
     # method calls on objects returned from _attached_entities_iter()
-    @torch.compiler.disable()
+    @compile_disable()
     def _mark_update_entities(self, index: torch.Tensor) -> None:
         """Mark entities as updated with the given index."""
         for ent in self._storage._attached_entities_iter():
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
@@ -27,6 +27,7 @@
         from functorch import vmap
     except ImportError as err_ft:
         raise err_ft from err
+from torchrl._utils import implement_for
 from torchrl.envs.utils import step_mdp
 
 try:
@@ -546,6 +547,7 @@ def decorated_module(*module_args_params):
             ) from err
 
 
+@implement_for("torch", "2.7")
 def _pseudo_vmap(
     func: Callable,
     in_dims: Any = 0,
@@ -581,6 +583,7 @@ def new_func(*args, in_dims=in_dims, out_dims=out_dims, **kwargs):
                 in_dims = (in_dims,) * len(args)
             if isinstance(out_dims, int):
                 out_dims = (out_dims,)
+
             vs = zip(*tuple(tree_map(_unbind, in_dims, args)))
             rs = []
             for v in vs:
@@ -597,6 +600,22 @@ def new_func(*args, in_dims=in_dims, out_dims=out_dims, **kwargs):
     return new_func
 
 
+@implement_for("torch", None, "2.7")
+def _pseudo_vmap(  # noqa: F811
+    func: Callable,
+    in_dims: Any = 0,
+    out_dims: Any = 0,
+    randomness: str | None = None,
+    *,
+    chunk_size=None,
+):
+    @functools.wraps(func)
+    def new_func(*args, in_dims=in_dims, out_dims=out_dims, **kwargs):
+        raise NotImplementedError("This implementation is not supported for torch<2.7")
+
+    return new_func
+
+
 def _reduce(
     tensor: torch.Tensor, reduction: str, mask: torch.Tensor | None = None
 ) -> float | torch.Tensor: