From 6588f85d68a67e8bc0fdb433d6ad6ec54a450d51 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 16:44:30 -0800 Subject: [PATCH 1/7] Adds RL device for allowing CPU sim and RL device --- scripts/demos/pick_and_place.py | 94 +++++++++++-------- .../reinforcement_learning/rl_games/play.py | 4 - .../reinforcement_learning/rl_games/train.py | 5 - .../reinforcement_learning/rsl_rl/train.py | 2 +- source/isaaclab_rl/config/extension.toml | 2 +- source/isaaclab_rl/docs/CHANGELOG.rst | 10 ++ .../isaaclab_rl/rl_games/rl_games.py | 4 + .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 31 +++++- 8 files changed, 98 insertions(+), 54 deletions(-) diff --git a/scripts/demos/pick_and_place.py b/scripts/demos/pick_and_place.py index cc14dcb0a72..ae3c1bd420d 100644 --- a/scripts/demos/pick_and_place.py +++ b/scripts/demos/pick_and_place.py @@ -11,6 +11,7 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Keyboard control for Isaac Lab Pick and Place.") +parser.add_argument("--num_envs", type=int, default=32, help="Number of environments to spawn.") # append AppLauncher cli args AppLauncher.add_app_launcher_args(parser) # parse the arguments @@ -59,11 +60,16 @@ class PickAndPlaceEnvCfg(DirectRLEnvCfg): action_space = 4 observation_space = 6 state_space = 0 - device = "cpu" - # Simulation cfg. Note that we are forcing the simulation to run on CPU. - # This is because the surface gripper API is only supported on CPU backend for now. - sim: SimulationCfg = SimulationCfg(dt=1 / 60, render_interval=decimation, device="cpu") + # Simulation cfg. Surface grippers are currently only supported on CPU. + # Surface grippers also require scene query support to function. + sim: SimulationCfg = SimulationCfg( + dt=1 / 60, + device="cpu", + render_interval=decimation, + use_fabric=True, + enable_scene_query_support=True, + ) debug_vis = True # robot @@ -136,8 +142,8 @@ def __init__(self, cfg: PickAndPlaceEnvCfg, render_mode: str | None = None, **kw self.joint_vel = self.pick_and_place.data.joint_vel # Buffers - self.go_to_cube = False - self.go_to_target = False + self.go_to_cube = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self.go_to_target = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) self.target_pos = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32) self.instant_controls = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32) self.permanent_controls = torch.zeros((self.num_envs, 1), device=self.device, dtype=torch.float32) @@ -173,35 +179,36 @@ def set_up_keyboard(self): print("Keyboard set up!") print("The simulation is ready for you to try it out!") print("Your goal is pick up the purple cube and to drop it on the red sphere!") - print("Use the following controls to interact with the simulation:") - print("Press the 'A' key to have the gripper track the cube position.") - print("Press the 'D' key to have the gripper track the target position") - print("Press the 'W' or 'S' keys to move the gantry UP or DOWN respectively") - print("Press 'Q' or 'E' to OPEN or CLOSE the gripper respectively") + print(f"Number of environments: {self.num_envs}") + print("Use the following controls to interact with ALL environments simultaneously:") + print("Press the 'A' key to have all grippers track the cube position.") + print("Press the 'D' key to have all grippers track the target position") + print("Press the 'W' or 'S' keys to move all gantries UP or DOWN respectively") + print("Press 'Q' or 'E' to OPEN or CLOSE all grippers respectively") def _on_keyboard_event(self, event): """Checks for a keyboard event and assign the corresponding command control depending on key pressed.""" if event.type == carb.input.KeyboardEventType.KEY_PRESS: - # Logic on key press + # Logic on key press - apply to ALL environments if event.input.name == self._auto_aim_target: - self.go_to_target = True - self.go_to_cube = False + self.go_to_target[:] = True + self.go_to_cube[:] = False if event.input.name == self._auto_aim_cube: - self.go_to_cube = True - self.go_to_target = False + self.go_to_cube[:] = True + self.go_to_target[:] = False if event.input.name in self._instant_key_controls: - self.go_to_cube = False - self.go_to_target = False - self.instant_controls[0] = self._instant_key_controls[event.input.name] + self.go_to_cube[:] = False + self.go_to_target[:] = False + self.instant_controls[:] = self._instant_key_controls[event.input.name] if event.input.name in self._permanent_key_controls: - self.go_to_cube = False - self.go_to_target = False - self.permanent_controls[0] = self._permanent_key_controls[event.input.name] - # On key release, the robot stops moving + self.go_to_cube[:] = False + self.go_to_target[:] = False + self.permanent_controls[:] = self._permanent_key_controls[event.input.name] + # On key release, all robots stop moving elif event.type == carb.input.KeyboardEventType.KEY_RELEASE: - self.go_to_cube = False - self.go_to_target = False - self.instant_controls[0] = self._instant_key_controls["ZEROS"] + self.go_to_cube[:] = False + self.go_to_target[:] = False + self.instant_controls[:] = self._instant_key_controls["ZEROS"] def _setup_scene(self): self.pick_and_place = Articulation(self.cfg.robot_cfg) @@ -225,28 +232,30 @@ def _pre_physics_step(self, actions: torch.Tensor) -> None: def _apply_action(self) -> None: # We use the keyboard outputs as an action. - if self.go_to_cube: + # Process each environment independently + if self.go_to_cube.any(): # Effort based proportional controller to track the cube position - head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]] - head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]] - cube_pos_x = self.cube.data.root_pos_w[:, 0] - self.scene.env_origins[:, 0] - cube_pos_y = self.cube.data.root_pos_w[:, 1] - self.scene.env_origins[:, 1] + head_pos_x = self.pick_and_place.data.joint_pos[self.go_to_cube, self._x_dof_idx[0]] + head_pos_y = self.pick_and_place.data.joint_pos[self.go_to_cube, self._y_dof_idx[0]] + cube_pos_x = self.cube.data.root_pos_w[self.go_to_cube, 0] - self.scene.env_origins[self.go_to_cube, 0] + cube_pos_y = self.cube.data.root_pos_w[self.go_to_cube, 1] - self.scene.env_origins[self.go_to_cube, 1] d_cube_robot_x = cube_pos_x - head_pos_x d_cube_robot_y = cube_pos_y - head_pos_y - self.instant_controls[0] = torch.tensor( + self.instant_controls[self.go_to_cube] = torch.tensor( [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device ) - elif self.go_to_target: + elif self.go_to_target.any(): # Effort based proportional controller to track the target position - head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]] - head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]] - target_pos_x = self.target_pos[:, 0] - target_pos_y = self.target_pos[:, 1] + head_pos_x = self.pick_and_place.data.joint_pos[self.go_to_target, self._x_dof_idx[0]] + head_pos_y = self.pick_and_place.data.joint_pos[self.go_to_target, self._y_dof_idx[0]] + target_pos_x = self.target_pos[self.go_to_target, 0] + target_pos_y = self.target_pos[self.go_to_target, 1] d_target_robot_x = target_pos_x - head_pos_x d_target_robot_y = target_pos_y - head_pos_y - self.instant_controls[0] = torch.tensor( + self.instant_controls[self.go_to_target] = torch.tensor( [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device ) + # Set the joint effort targets for the picker self.pick_and_place.set_joint_effort_target( self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx @@ -258,7 +267,7 @@ def _apply_action(self) -> None: self.permanent_controls[:, 0].unsqueeze(dim=1), joint_ids=self._z_dof_idx ) # Set the gripper command - self.gripper.set_grippers_command(self.instant_controls[:, 2].unsqueeze(dim=1)) + self.gripper.set_grippers_command(self.instant_controls[:, 2]) def _get_observations(self) -> dict: # Get the observations @@ -397,8 +406,11 @@ def _debug_vis_callback(self, event): def main(): """Main function.""" + # create environment configuration + env_cfg = PickAndPlaceEnvCfg() + env_cfg.scene.num_envs = args_cli.num_envs # create environment - pick_and_place = PickAndPlaceEnv(PickAndPlaceEnvCfg()) + pick_and_place = PickAndPlaceEnv(env_cfg) obs, _ = pick_and_place.reset() while simulation_app.is_running(): # check for selected robots @@ -409,4 +421,4 @@ def main(): if __name__ == "__main__": main() - simulation_app.close() + simulation_app.close() \ No newline at end of file diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index d6faec37316..135980e92c7 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -95,10 +95,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # override configurations with non-hydra CLI arguments env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device - # update agent device to match simulation device - if args_cli.device is not None: - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device # randomly sample a seed if seed = -1 if args_cli.seed == -1: diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index 634e5975676..d6900a3789f 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -102,11 +102,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen "Please use GPU device (e.g., --device cuda) for distributed training." ) - # update agent device to match simulation device - if args_cli.device is not None: - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device - # randomly sample a seed if seed = -1 if args_cli.seed == -1: args_cli.seed = random.randint(0, 10000) diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 8b66feb28aa..ad739f4559a 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -182,7 +182,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = gym.wrappers.RecordVideo(env, **video_kwargs) # wrap around environment for rsl-rl - env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions, rl_device=agent_cfg.device) # create runner from rsl-rl if agent_cfg.class_name == "OnPolicyRunner": diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml index 0e2f31470b6..35ce2649060 100644 --- a/source/isaaclab_rl/config/extension.toml +++ b/source/isaaclab_rl/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.4.4" +version = "0.5.0" # Description title = "Isaac Lab RL" diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index e3d44a08d96..3698bf770e0 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -1,6 +1,16 @@ Changelog --------- +0.5.0 (2025-11-10) +~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added support for decoupling RL device from simulation device in for RL wrappers. + This allows users to run simulation on one device (e.g., CPU) while running RL training/inference on another device. + + 0.4.4 (2025-10-15) ~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py index 8c448c172ac..22df1e8bef4 100644 --- a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py +++ b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py @@ -319,6 +319,10 @@ def _process_obs(self, obs_dict: VecEnvObs) -> dict[str, torch.Tensor] | dict[st - ``"obs"``: either a concatenated tensor (``concate_obs_group=True``) or a Dict of group tensors. - ``"states"`` (optional): same structure as above when state groups are configured; omitted otherwise. """ + # move observations to RL device if different from sim device + if self._rl_device != self._sim_device: + obs_dict = {key: obs.to(device=self._rl_device) for key, obs in obs_dict.items()} + # clip the observations for key, obs in obs_dict.items(): obs_dict[key] = torch.clamp(obs, -self._clip_obs, self._clip_obs) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 73ceae04693..10377f84bd6 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -24,7 +24,7 @@ class RslRlVecEnvWrapper(VecEnv): https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py """ - def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None): + def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None, rl_device: str | None = None): """Initializes the wrapper. Note: @@ -33,6 +33,8 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N Args: env: The environment to wrap around. clip_actions: The clipping value for actions. If ``None``, then no clipping is done. + rl_device: The device for RL agent/policy. If ``None``, uses the environment device. + This allows running the RL agent on a different device than the environment. Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. @@ -49,11 +51,21 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N self.env = env self.clip_actions = clip_actions + # store the RL device (where policy/training happens) + # this may be different from env.device (where task buffers are) + if rl_device is None: + self.rl_device = self.unwrapped.device + else: + self.rl_device = rl_device + # store information required by wrapper self.num_envs = self.unwrapped.num_envs - self.device = self.unwrapped.device + self.device = self.rl_device self.max_episode_length = self.unwrapped.max_episode_length + # track the environment device separately + self.env_device = self.unwrapped.device + # obtain dimensions of the environment if hasattr(self.unwrapped, "action_manager"): self.num_actions = self.unwrapped.action_manager.total_action_dim @@ -139,6 +151,9 @@ def seed(self, seed: int = -1) -> int: # noqa: D102 def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 # reset the environment obs_dict, extras = self.env.reset() + # move observations to RL device if different from env device + if self.rl_device != self.env_device: + obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} return TensorDict(obs_dict, batch_size=[self.num_envs]), extras def get_observations(self) -> TensorDict: @@ -147,14 +162,26 @@ def get_observations(self) -> TensorDict: obs_dict = self.unwrapped.observation_manager.compute() else: obs_dict = self.unwrapped._get_observations() + # move observations to RL device if different from env device + if self.rl_device != self.env_device: + obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} return TensorDict(obs_dict, batch_size=[self.num_envs]) def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: + # move actions to env device if coming from different RL device + if self.rl_device != self.env_device: + actions = actions.to(self.env_device) # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) # record step information obs_dict, rew, terminated, truncated, extras = self.env.step(actions) + # move outputs to RL device if different from env device + if self.rl_device != self.env_device: + obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} + rew = rew.to(self.rl_device) + terminated = terminated.to(self.rl_device) + truncated = truncated.to(self.rl_device) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) # move time out information to the extras dict From ee5a07eeeb7ecf97dcad455dce4e837eaf2712fa Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 16:51:16 -0800 Subject: [PATCH 2/7] add test case --- scripts/demos/pick_and_place.py | 12 +- .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 4 +- .../test/test_rl_device_separation.py | 680 ++++++++++++++++++ 3 files changed, 689 insertions(+), 7 deletions(-) create mode 100644 source/isaaclab_tasks/test/test_rl_device_separation.py diff --git a/scripts/demos/pick_and_place.py b/scripts/demos/pick_and_place.py index ae3c1bd420d..249059c61f4 100644 --- a/scripts/demos/pick_and_place.py +++ b/scripts/demos/pick_and_place.py @@ -241,8 +241,8 @@ def _apply_action(self) -> None: cube_pos_y = self.cube.data.root_pos_w[self.go_to_cube, 1] - self.scene.env_origins[self.go_to_cube, 1] d_cube_robot_x = cube_pos_x - head_pos_x d_cube_robot_y = cube_pos_y - head_pos_y - self.instant_controls[self.go_to_cube] = torch.tensor( - [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device + self.instant_controls[self.go_to_cube] = torch.stack( + [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, torch.zeros_like(d_cube_robot_x)], dim=1 ) elif self.go_to_target.any(): # Effort based proportional controller to track the target position @@ -252,10 +252,10 @@ def _apply_action(self) -> None: target_pos_y = self.target_pos[self.go_to_target, 1] d_target_robot_x = target_pos_x - head_pos_x d_target_robot_y = target_pos_y - head_pos_y - self.instant_controls[self.go_to_target] = torch.tensor( - [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device + self.instant_controls[self.go_to_target] = torch.stack( + [d_target_robot_x * 5.0, d_target_robot_y * 5.0, torch.zeros_like(d_target_robot_x)], dim=1 ) - + # Set the joint effort targets for the picker self.pick_and_place.set_joint_effort_target( self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx @@ -421,4 +421,4 @@ def main(): if __name__ == "__main__": main() - simulation_app.close() \ No newline at end of file + simulation_app.close() diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 10377f84bd6..784892f7e37 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -24,7 +24,9 @@ class RslRlVecEnvWrapper(VecEnv): https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py """ - def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None, rl_device: str | None = None): + def __init__( + self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None, rl_device: str | None = None + ): """Initializes the wrapper. Note: diff --git a/source/isaaclab_tasks/test/test_rl_device_separation.py b/source/isaaclab_tasks/test/test_rl_device_separation.py new file mode 100644 index 00000000000..ec3f7060877 --- /dev/null +++ b/source/isaaclab_tasks/test/test_rl_device_separation.py @@ -0,0 +1,680 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Test RL device separation across all supported RL libraries. + +This test verifies that RL library wrappers correctly handle device transfers when the +simulation device differs from the RL training device. + +Device Architecture: + 1. sim_device: Where physics simulation runs and environment buffers live + 2. rl_device: Where policy networks and training computations occur + +Test Scenarios: + - GPU simulation + GPU RL: Same device (no transfers needed, optimal performance) + - GPU simulation + CPU RL: Cross-device transfers (wrapper handles transfers) + - CPU simulation + CPU RL: CPU-only operation + +Each test verifies the wrapper correctly: + 1. Unwrapped env: operates entirely on sim_device + 2. Wrapper: accepts actions on rl_device (where policy generates them) + 3. Wrapper: internally transfers actions from rl_device → sim_device for env.step() + 4. Wrapper: transfers outputs from sim_device → rl_device (for policy to use) + +Tested Libraries: + - RSL-RL: TensorDict observations, explicit rl_device parameter + * Transfers observations and rewards to rl_device + - RL Games: Dict observations, explicit rl_device parameter + * Transfers observations and rewards to rl_device + - Stable-Baselines3: Numpy arrays (CPU-only by design) + * Always converts to/from numpy on CPU + - skrl: Dict observations, uses skrl.config.torch.device for RL device + * Keeps observations on sim_device (policy handles transfer) + * Only transfers actions from rl_device to sim_device + +IMPORTANT: Due to Isaac Sim limitations, only ONE test can be run per pytest invocation. +Run tests individually: + pytest test_rl_device_separation.py::test_rsl_rl_device_separation_gpu_to_gpu -v -s + pytest test_rl_device_separation.py::test_rsl_rl_device_separation_gpu_to_cpu -v -s + pytest test_rl_device_separation.py::test_rl_games_device_separation_gpu_to_gpu -v -s + ... +""" + +from isaaclab.app import AppLauncher + +# launch the simulator +app_launcher = AppLauncher(headless=True) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import torch + +import carb +import omni.usd +import pytest + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.parse_cfg import parse_env_cfg + +# Test environment - use Cartpole as it's simple and fast +TEST_ENV = "Isaac-Cartpole-v0" +NUM_ENVS = 4 + + +def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): + """Helper function to test RSL-RL with specified device configuration. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") + """ + from tensordict import TensorDict + + from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper + + print(f"\n{'=' * 60}") + print(f">>> Testing RSL-RL with sim_device={sim_device}, rl_device={rl_device}") + print(f"{'=' * 60}") + + # Create a new stage + omni.usd.get_context().new_stage() + # Reset the rtx sensors carb setting to False + carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) + + try: + # Parse environment config + print(" [1/6] Parsing environment config...") + env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) + + # Create environment + print(" [2/6] Creating environment (may take 5-10s)...") + env = gym.make(TEST_ENV, cfg=env_cfg) + print(" [2/6] Environment created successfully") + except Exception as e: + # Try to close environment on exception + if "env" in locals() and hasattr(env, "_is_closed"): + env.close() + else: + if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): + e.obj.close() + pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") + + # Disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None + + # Verify environment device + print(" [3/6] Verifying environment device...") + assert ( + env.unwrapped.device == sim_device + ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + + # Test environment directly before wrapping to verify it returns data on sim device + print(" [3/6] Testing unwrapped environment returns data on sim_device...") + obs_dict, _ = env.reset() + for key, value in obs_dict.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(sim_device).type + ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + + # Step unwrapped environment to verify outputs are on sim device + action_space = env.unwrapped.single_action_space + test_action = torch.zeros(NUM_ENVS, action_space.shape[0], device=sim_device) + obs_dict, rew, term, trunc, extras = env.step(test_action) + assert ( + rew.device.type == torch.device(sim_device).type + ), f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" + assert ( + term.device.type == torch.device(sim_device).type + ), f"Unwrapped env terminated should be on {sim_device}, got {term.device}" + print(f" [3/6] Verified: Unwrapped environment returns data on {sim_device}") + + # Create RSL-RL wrapper with RL device + print(" [4/6] Creating RSL-RL wrapper...") + env = RslRlVecEnvWrapper(env, rl_device=rl_device) + print(f" [4/6] Wrapper created (env_device={env.env_device}, rl_device={env.rl_device})") + + # Verify devices + assert env.env_device == sim_device, f"Wrapper env_device should be {sim_device}" + assert env.rl_device == rl_device, f"Wrapper RL device should be {rl_device}" + assert env.device == rl_device, f"Wrapper device property should be {rl_device}" + + # Reset and step to test device transfers + print(" [5/6] Testing reset and step operations...") + obs, extras = env.reset() + print(" [5/6] Reset completed") + + # Verify observations are on RL device (RSL-RL returns TensorDict) + assert isinstance(obs, TensorDict), f"Expected TensorDict, got {type(obs)}" + for key, value in obs.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(rl_device).type + ), f"Observation '{key}' should be on {rl_device}, got {value.device}" + + # Sample random action on RL device (simulating policy output) + # RSL-RL: action_space.shape is (num_envs, action_dim) + action = 2 * torch.rand(env.action_space.shape, device=rl_device) - 1 + print(f" [5/6] Action created on rl_device: {action.device}, shape: {action.shape}") + + # Verify action is on RL device before calling step + assert ( + action.device.type == torch.device(rl_device).type + ), f"Action should be on {rl_device} before step, got {action.device}" + + # Step environment - wrapper should: + # 1. Accept action on rl_device + # 2. Transfer action from rl_device to sim_device internally + # 3. Call unwrapped env.step() with action on sim_device + # 4. Transfer outputs from sim_device to rl_device + obs, reward, dones, extras = env.step(action) + print(" [5/6] Step completed - wrapper handled device transfers") + + # Verify all outputs are on RL device (wrapper transferred from sim_device) + print(" [6/6] Verifying device transfers...") + assert isinstance(obs, TensorDict), f"Expected TensorDict, got {type(obs)}" + for key, value in obs.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(rl_device).type + ), f"Step observation '{key}' should be on {rl_device}, got {value.device}" + assert reward.device.type == torch.device(rl_device).type, f"Rewards should be on {rl_device}, got {reward.device}" + assert dones.device.type == torch.device(rl_device).type, f"Dones should be on {rl_device}, got {dones.device}" + + # Cleanup + print(" [6/6] Cleaning up environment...") + env.close() + print(f"✓ RSL-RL test PASSED for sim_device={sim_device}, rl_device={rl_device}") + print(" Wrapper device transfer verified:") + print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") + print(f" 2. Wrapper: accepts actions on {rl_device} (from policy)") + print(f" 3. Wrapper: internally transfers actions to {sim_device} for env.step()") + print(f" 4. Wrapper: transfers outputs from {sim_device} to {rl_device} (for policy)") + print("-" * 80) + + +def _test_rl_games_device_separation(sim_device: str, rl_device: str): + """Helper function to test RL Games with specified device configuration. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") + """ + from isaaclab_rl.rl_games import RlGamesVecEnvWrapper + + print(f"\n{'=' * 60}") + print(f">>> Testing RL Games with sim_device={sim_device}, rl_device={rl_device}") + print(f"{'=' * 60}") + + # Create a new stage + omni.usd.get_context().new_stage() + # Reset the rtx sensors carb setting to False + carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) + + try: + # Parse environment config + print(" [1/5] Parsing environment config...") + env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) + + # Create environment + print(" [2/5] Creating environment (may take 5-10s)...") + env = gym.make(TEST_ENV, cfg=env_cfg) + print(" [2/5] Environment created successfully") + except Exception as e: + # Try to close environment on exception + if "env" in locals() and hasattr(env, "_is_closed"): + env.close() + else: + if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): + e.obj.close() + pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") + + # Disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None + + # Verify environment device + print(" [3/5] Verifying environment device...") + assert ( + env.unwrapped.device == sim_device + ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + + # Test environment directly before wrapping to verify it returns data on sim device + print(" [3/5] Testing unwrapped environment returns data on sim_device...") + obs_dict, _ = env.reset() + for key, value in obs_dict.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(sim_device).type + ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + + # Step unwrapped environment to verify outputs are on sim device + action_space = env.unwrapped.single_action_space + test_action = torch.zeros(NUM_ENVS, action_space.shape[0], device=sim_device) + obs_dict, rew, term, trunc, extras = env.step(test_action) + assert ( + rew.device.type == torch.device(sim_device).type + ), f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" + assert ( + term.device.type == torch.device(sim_device).type + ), f"Unwrapped env terminated should be on {sim_device}, got {term.device}" + print(f" [3/5] Verified: Unwrapped environment returns data on {sim_device}") + + # Create RL Games wrapper with RL device + print(" [3/5] Creating RL Games wrapper...") + env = RlGamesVecEnvWrapper(env, rl_device=rl_device, clip_obs=10.0, clip_actions=1.0) + + # Reset and step to test device transfers + print(" [4/5] Testing reset and step operations...") + obs = env.reset() + print(" [4/5] Reset completed") + + # Verify observations are on RL device + if isinstance(obs, dict): + for key, value in obs.items(): + assert ( + value.device.type == torch.device(rl_device).type + ), f"Observation '{key}' should be on {rl_device}, got {value.device}" + else: + assert ( + obs.device.type == torch.device(rl_device).type + ), f"Observation should be on {rl_device}, got {obs.device}" + + # Sample random action on RL device (simulating policy output) + action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=rl_device) - 1 + print(f" [4/5] Action created on rl_device: {action.device}, shape: {action.shape}") + + # Verify action is on RL device before calling step + assert ( + action.device.type == torch.device(rl_device).type + ), f"Action should be on {rl_device} before step, got {action.device}" + + # Step environment - wrapper should: + # 1. Accept action on rl_device + # 2. Transfer action from rl_device to sim_device internally + # 3. Call unwrapped env.step() with action on sim_device + # 4. Transfer outputs from sim_device to rl_device + obs, reward, dones, info = env.step(action) + print(" [4/5] Step completed - wrapper handled device transfers") + + # Verify all outputs are on RL device (wrapper transferred from sim_device) + print(" [5/5] Verifying device transfers...") + # RL Games returns flat tensor for observations + if isinstance(obs, dict): + for key, value in obs.items(): + assert ( + value.device.type == torch.device(rl_device).type + ), f"Observation '{key}' should be on {rl_device}, got {value.device}" + else: + assert ( + obs.device.type == torch.device(rl_device).type + ), f"Observations should be on {rl_device}, got {obs.device}" + assert reward.device.type == torch.device(rl_device).type, f"Rewards should be on {rl_device}, got {reward.device}" + assert dones.device.type == torch.device(rl_device).type, f"Dones should be on {rl_device}, got {dones.device}" + + # Cleanup + print(" [5/5] Cleaning up environment...") + env.close() + print(f"✓ RL Games test PASSED for sim_device={sim_device}, rl_device={rl_device}") + print(" Wrapper device transfer verified:") + print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") + print(f" 2. Wrapper: accepts actions on {rl_device} (from policy)") + print(f" 3. Wrapper: internally transfers actions to {sim_device} for env.step()") + print(f" 4. Wrapper: transfers outputs from {sim_device} to {rl_device} (for policy)") + print("-" * 80) + + +def _test_sb3_device_separation(sim_device: str): + """Helper function to test Stable-Baselines3 with specified device configuration. + + Note: SB3 always converts to CPU/numpy, so we don't test rl_device parameter. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + """ + import numpy as np + + from isaaclab_rl.sb3 import Sb3VecEnvWrapper + + print(f"\n{'=' * 60}") + print(f">>> Testing SB3 with sim_device={sim_device}") + print(f"{'=' * 60}") + + # Create a new stage + omni.usd.get_context().new_stage() + # Reset the rtx sensors carb setting to False + carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) + + try: + # Parse environment config + print(" [1/5] Parsing environment config...") + env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) + + # Create environment + print(" [2/5] Creating environment (may take 5-10s)...") + env = gym.make(TEST_ENV, cfg=env_cfg) + print(" [2/5] Environment created successfully") + except Exception as e: + # Try to close environment on exception + if "env" in locals() and hasattr(env, "_is_closed"): + env.close() + else: + if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): + e.obj.close() + pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") + + # Disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None + + # Verify environment device + print(" [3/5] Verifying environment device...") + assert ( + env.unwrapped.device == sim_device + ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + + # Test environment directly before wrapping to verify it returns data on sim device + print(" [3/5] Testing unwrapped environment returns data on sim_device...") + obs_dict, _ = env.reset() + for key, value in obs_dict.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(sim_device).type + ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + print(f" [3/5] Verified: Unwrapped environment returns data on {sim_device}") + + # Create SB3 wrapper (always converts to numpy/CPU) + print(" [3/5] Creating SB3 wrapper...") + env = Sb3VecEnvWrapper(env) + + # Reset and step to test device transfers + print(" [4/5] Testing reset and step operations...") + obs = env.reset() + print(" [4/5] Reset completed") + + # SB3 observations should always be numpy arrays (on CPU) + assert isinstance(obs, np.ndarray), f"SB3 observations should be numpy arrays, got {type(obs)}" + + # Sample random action (SB3 uses numpy) + action = 2 * np.random.rand(env.num_envs, *env.action_space.shape) - 1 + assert isinstance(action, np.ndarray), f"Action should be numpy array, got {type(action)}" + print(f" [4/5] Action sampled (numpy array), shape: {action.shape}") + + # Step environment - wrapper should: + # 1. Convert numpy action to torch tensor on sim_device internally + # 2. Call unwrapped env.step() with action on sim_device + # 3. Convert outputs from sim_device tensors to numpy arrays + obs, reward, done, info = env.step(action) + print(" [4/5] Step completed, outputs converted to numpy") + + # Verify all outputs are numpy arrays (wrapper transferred and converted) + print(" [5/5] Verifying numpy conversions...") + assert isinstance(obs, np.ndarray), f"Observations should be numpy arrays, got {type(obs)}" + assert isinstance(reward, np.ndarray), f"Rewards should be numpy arrays, got {type(reward)}" + assert isinstance(done, np.ndarray), f"Dones should be numpy arrays, got {type(done)}" + + # Cleanup + print(" [5/5] Cleaning up environment...") + env.close() + print(f"✓ SB3 test PASSED for sim_device={sim_device}") + print(" Wrapper device transfer verified:") + print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") + print(" 2. Wrapper: accepts numpy arrays (from policy on CPU)") + print(f" 3. Wrapper: internally converts to tensors on {sim_device} for env.step()") + print(f" 4. Wrapper: converts outputs from {sim_device} tensors to numpy arrays (for policy)") + print("-" * 80) + + +def _test_skrl_device_separation(sim_device: str, rl_device: str): + """Helper function to test skrl with specified device configuration. + + Note: skrl uses skrl.config.torch.device for device configuration. + This can be set via agent_cfg["device"] for consistency with other libraries. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") - set via skrl.config.torch.device + """ + try: + import skrl + from skrl.envs.wrappers.torch import wrap_env + except ImportError: + pytest.skip("skrl not installed") + + print(f"\n{'=' * 60}") + print(f">>> Testing skrl with sim_device={sim_device}, rl_device={rl_device}") + print(f" Using skrl.config.torch.device = {rl_device}") + print(f"{'=' * 60}") + + # Create agent config with device parameter (for demonstration/consistency) + agent_cfg = {"device": rl_device} + + # Configure skrl device (can be set from agent_cfg for consistency with other libraries) + if "device" in agent_cfg: + skrl.config.torch.device = torch.device(agent_cfg["device"]) + else: + skrl.config.torch.device = torch.device(rl_device) + + # Create a new stage + omni.usd.get_context().new_stage() + # Reset the rtx sensors carb setting to False + carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) + + try: + # Parse environment config + print(" [1/6] Parsing environment config...") + env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) + + # Create environment + print(" [2/6] Creating environment (may take 5-10s)...") + env = gym.make(TEST_ENV, cfg=env_cfg) + print(" [2/6] Environment created successfully") + except Exception as e: + # Try to close environment on exception + if "env" in locals() and hasattr(env, "_is_closed"): + env.close() + else: + if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): + e.obj.close() + pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") + + # Disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None + + # Verify environment device + print(" [3/6] Verifying environment device...") + assert ( + env.unwrapped.device == sim_device + ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + + # Test environment directly before wrapping to verify it returns data on sim device + print(" [3/6] Testing unwrapped environment returns data on sim_device...") + obs_dict, _ = env.reset() + for key, value in obs_dict.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(sim_device).type + ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + print(f" [3/6] Verified: Unwrapped environment returns data on {sim_device}") + + # Wrap with skrl (will use skrl.config.torch.device for policy) + print(" [3/6] Creating skrl wrapper...") + env = wrap_env(env, wrapper="isaaclab") + + # Reset to test basic functionality + print(" [4/6] Testing reset and step operations...") + obs, info = env.reset() + print(" [4/6] Reset completed") + + # Verify observations are tensors or dict + # skrl can return either dict or tensor depending on configuration + if isinstance(obs, dict): + assert isinstance(obs["policy"], torch.Tensor), f"Observations should be tensors, got {type(obs['policy'])}" + else: + assert isinstance(obs, torch.Tensor), f"Observations should be tensors, got {type(obs)}" + + # Sample random action on RL device (simulating policy output - skrl always uses GPU for training) + rl_device_obj = skrl.config.torch.device + action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=rl_device_obj) - 1 + print(f" [4/6] Action created on rl_device: {rl_device_obj}, shape: {action.shape}") + + # Verify action is on RL device before calling step + assert ( + action.device.type == rl_device_obj.type + ), f"Action should be on {rl_device_obj} before step, got {action.device}" + + # Step environment - wrapper should: + # 1. Accept action on rl_device + # 2. Transfer action from rl_device to sim_device internally + # 3. Call unwrapped env.step() with action on sim_device + # 4. Return outputs on sim_device (skrl policy handles device transfer) + print(" [5/6] Testing step with action on rl_device...") + transition = env.step(action) + print(" [5/6] Step completed - wrapper handled action device transfer") + + # Verify outputs are tensors + # Note: skrl wrapper returns outputs on sim_device, not rl_device + # The policy is responsible for transferring observations when needed + print(" [6/6] Verifying outputs are on sim_device (skrl behavior)...") + if len(transition) == 5: + obs, reward, terminated, truncated, info = transition + # Check observations (can be dict or tensor) + if isinstance(obs, dict): + assert isinstance(obs["policy"], torch.Tensor), "Observations should be tensors" + assert ( + obs["policy"].device.type == torch.device(sim_device).type + ), f"Observations should be on {sim_device}, got {obs['policy'].device}" + else: + assert isinstance(obs, torch.Tensor), "Observations should be tensors" + assert ( + obs.device.type == torch.device(sim_device).type + ), f"Observations should be on {sim_device}, got {obs.device}" + assert isinstance(reward, torch.Tensor), "Rewards should be tensors" + assert ( + reward.device.type == torch.device(sim_device).type + ), f"Rewards should be on {sim_device}, got {reward.device}" + assert isinstance(terminated, torch.Tensor), "Terminated should be tensors" + assert ( + terminated.device.type == torch.device(sim_device).type + ), f"Terminated should be on {sim_device}, got {terminated.device}" + assert isinstance(truncated, torch.Tensor), "Truncated should be tensors" + assert ( + truncated.device.type == torch.device(sim_device).type + ), f"Truncated should be on {sim_device}, got {truncated.device}" + elif len(transition) == 4: + obs, reward, done, info = transition + # Check observations (can be dict or tensor) + if isinstance(obs, dict): + assert isinstance(obs["policy"], torch.Tensor), "Observations should be tensors" + assert ( + obs["policy"].device.type == torch.device(sim_device).type + ), f"Observations should be on {sim_device}, got {obs['policy'].device}" + else: + assert isinstance(obs, torch.Tensor), "Observations should be tensors" + assert ( + obs.device.type == torch.device(sim_device).type + ), f"Observations should be on {sim_device}, got {obs.device}" + assert isinstance(reward, torch.Tensor), "Rewards should be tensors" + assert ( + reward.device.type == torch.device(sim_device).type + ), f"Rewards should be on {sim_device}, got {reward.device}" + assert isinstance(done, torch.Tensor), "Dones should be tensors" + assert done.device.type == torch.device(sim_device).type, f"Dones should be on {sim_device}, got {done.device}" + else: + pytest.fail(f"Unexpected number of return values from step: {len(transition)}") + + # Cleanup + print(" [6/6] Cleaning up environment...") + env.close() + print(f"✓ skrl test PASSED for sim_device={sim_device}, rl_device={rl_device_obj}") + print(" Wrapper device transfer verified (skrl-specific behavior):") + print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") + print(f" 2. Wrapper: accepts actions on {rl_device_obj} (from policy)") + print(f" 3. Wrapper: internally transfers actions to {sim_device} for env.step()") + print(f" 4. Wrapper: returns outputs on {sim_device} (policy handles obs device transfer)") + print(" Note: Unlike RSL-RL/RL-Games, skrl keeps observations on sim_device") + print("-" * 80) + + +# ============================================================================ +# Test Functions +# ============================================================================ + + +def test_rsl_rl_device_separation_gpu_to_gpu(): + """Test RSL-RL with GPU simulation and GPU RL (default configuration).""" + try: + import isaaclab_rl.rsl_rl # noqa: F401 + except ImportError: + pytest.skip("RSL-RL not installed") + + _test_rsl_rl_device_separation(sim_device="cuda:0", rl_device="cuda:0") + + +def test_rsl_rl_device_separation_gpu_to_cpu(): + """Test RSL-RL with GPU simulation and CPU RL (cross-device transfer).""" + try: + import isaaclab_rl.rsl_rl # noqa: F401 + except ImportError: + pytest.skip("RSL-RL not installed") + + _test_rsl_rl_device_separation(sim_device="cuda:0", rl_device="cpu") + + +def test_rl_games_device_separation_gpu_to_gpu(): + """Test RL Games with GPU simulation and GPU RL (default configuration).""" + try: + import isaaclab_rl.rl_games # noqa: F401 + except ImportError: + pytest.skip("RL Games not installed") + + _test_rl_games_device_separation(sim_device="cuda:0", rl_device="cuda:0") + + +def test_rl_games_device_separation_gpu_to_cpu(): + """Test RL Games with GPU simulation and CPU RL (cross-device transfer).""" + try: + import isaaclab_rl.rl_games # noqa: F401 + except ImportError: + pytest.skip("RL Games not installed") + + _test_rl_games_device_separation(sim_device="cuda:0", rl_device="cpu") + + +def test_sb3_device_separation_gpu(): + """Test Stable-Baselines3 with GPU simulation. + + Note: SB3 always converts to CPU/numpy, so only GPU simulation is tested. + """ + try: + import isaaclab_rl.sb3 # noqa: F401 + except ImportError: + pytest.skip("Stable-Baselines3 not installed") + + _test_sb3_device_separation(sim_device="cuda:0") + + +def test_skrl_device_separation_gpu(): + """Test skrl with GPU simulation and GPU policy (matching devices).""" + try: + import skrl # noqa: F401 + except ImportError: + pytest.skip("skrl not installed") + + _test_skrl_device_separation(sim_device="cuda:0", rl_device="cuda:0") + + +def test_skrl_device_separation_cpu_to_gpu(): + """Test skrl with CPU simulation and GPU policy. + + Note: Uses skrl.config.torch.device to set the policy device to GPU + while the environment runs on CPU. + """ + try: + import skrl # noqa: F401 + except ImportError: + pytest.skip("skrl not installed") + + _test_skrl_device_separation(sim_device="cpu", rl_device="cuda:0") From b40845ec9428df053e9fa552f3e90bacc6743f64 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 16:53:23 -0800 Subject: [PATCH 3/7] isolate pick_and_place change --- scripts/demos/pick_and_place.py | 98 +++++++++++++++------------------ 1 file changed, 43 insertions(+), 55 deletions(-) diff --git a/scripts/demos/pick_and_place.py b/scripts/demos/pick_and_place.py index 249059c61f4..bc6d35940f0 100644 --- a/scripts/demos/pick_and_place.py +++ b/scripts/demos/pick_and_place.py @@ -11,7 +11,6 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Keyboard control for Isaac Lab Pick and Place.") -parser.add_argument("--num_envs", type=int, default=32, help="Number of environments to spawn.") # append AppLauncher cli args AppLauncher.add_app_launcher_args(parser) # parse the arguments @@ -60,16 +59,11 @@ class PickAndPlaceEnvCfg(DirectRLEnvCfg): action_space = 4 observation_space = 6 state_space = 0 + device = "cpu" - # Simulation cfg. Surface grippers are currently only supported on CPU. - # Surface grippers also require scene query support to function. - sim: SimulationCfg = SimulationCfg( - dt=1 / 60, - device="cpu", - render_interval=decimation, - use_fabric=True, - enable_scene_query_support=True, - ) + # Simulation cfg. Note that we are forcing the simulation to run on CPU. + # This is because the surface gripper API is only supported on CPU backend for now. + sim: SimulationCfg = SimulationCfg(dt=1 / 60, render_interval=decimation, device="cpu") debug_vis = True # robot @@ -142,8 +136,8 @@ def __init__(self, cfg: PickAndPlaceEnvCfg, render_mode: str | None = None, **kw self.joint_vel = self.pick_and_place.data.joint_vel # Buffers - self.go_to_cube = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self.go_to_target = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self.go_to_cube = False + self.go_to_target = False self.target_pos = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32) self.instant_controls = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32) self.permanent_controls = torch.zeros((self.num_envs, 1), device=self.device, dtype=torch.float32) @@ -179,36 +173,35 @@ def set_up_keyboard(self): print("Keyboard set up!") print("The simulation is ready for you to try it out!") print("Your goal is pick up the purple cube and to drop it on the red sphere!") - print(f"Number of environments: {self.num_envs}") - print("Use the following controls to interact with ALL environments simultaneously:") - print("Press the 'A' key to have all grippers track the cube position.") - print("Press the 'D' key to have all grippers track the target position") - print("Press the 'W' or 'S' keys to move all gantries UP or DOWN respectively") - print("Press 'Q' or 'E' to OPEN or CLOSE all grippers respectively") + print("Use the following controls to interact with the simulation:") + print("Press the 'A' key to have the gripper track the cube position.") + print("Press the 'D' key to have the gripper track the target position") + print("Press the 'W' or 'S' keys to move the gantry UP or DOWN respectively") + print("Press 'Q' or 'E' to OPEN or CLOSE the gripper respectively") def _on_keyboard_event(self, event): """Checks for a keyboard event and assign the corresponding command control depending on key pressed.""" if event.type == carb.input.KeyboardEventType.KEY_PRESS: - # Logic on key press - apply to ALL environments + # Logic on key press if event.input.name == self._auto_aim_target: - self.go_to_target[:] = True - self.go_to_cube[:] = False + self.go_to_target = True + self.go_to_cube = False if event.input.name == self._auto_aim_cube: - self.go_to_cube[:] = True - self.go_to_target[:] = False + self.go_to_cube = True + self.go_to_target = False if event.input.name in self._instant_key_controls: - self.go_to_cube[:] = False - self.go_to_target[:] = False - self.instant_controls[:] = self._instant_key_controls[event.input.name] + self.go_to_cube = False + self.go_to_target = False + self.instant_controls[0] = self._instant_key_controls[event.input.name] if event.input.name in self._permanent_key_controls: - self.go_to_cube[:] = False - self.go_to_target[:] = False - self.permanent_controls[:] = self._permanent_key_controls[event.input.name] - # On key release, all robots stop moving + self.go_to_cube = False + self.go_to_target = False + self.permanent_controls[0] = self._permanent_key_controls[event.input.name] + # On key release, the robot stops moving elif event.type == carb.input.KeyboardEventType.KEY_RELEASE: - self.go_to_cube[:] = False - self.go_to_target[:] = False - self.instant_controls[:] = self._instant_key_controls["ZEROS"] + self.go_to_cube = False + self.go_to_target = False + self.instant_controls[0] = self._instant_key_controls["ZEROS"] def _setup_scene(self): self.pick_and_place = Articulation(self.cfg.robot_cfg) @@ -232,30 +225,28 @@ def _pre_physics_step(self, actions: torch.Tensor) -> None: def _apply_action(self) -> None: # We use the keyboard outputs as an action. - # Process each environment independently - if self.go_to_cube.any(): + if self.go_to_cube: # Effort based proportional controller to track the cube position - head_pos_x = self.pick_and_place.data.joint_pos[self.go_to_cube, self._x_dof_idx[0]] - head_pos_y = self.pick_and_place.data.joint_pos[self.go_to_cube, self._y_dof_idx[0]] - cube_pos_x = self.cube.data.root_pos_w[self.go_to_cube, 0] - self.scene.env_origins[self.go_to_cube, 0] - cube_pos_y = self.cube.data.root_pos_w[self.go_to_cube, 1] - self.scene.env_origins[self.go_to_cube, 1] + head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]] + head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]] + cube_pos_x = self.cube.data.root_pos_w[:, 0] - self.scene.env_origins[:, 0] + cube_pos_y = self.cube.data.root_pos_w[:, 1] - self.scene.env_origins[:, 1] d_cube_robot_x = cube_pos_x - head_pos_x d_cube_robot_y = cube_pos_y - head_pos_y - self.instant_controls[self.go_to_cube] = torch.stack( - [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, torch.zeros_like(d_cube_robot_x)], dim=1 + self.instant_controls[0] = torch.tensor( + [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device ) - elif self.go_to_target.any(): + elif self.go_to_target: # Effort based proportional controller to track the target position - head_pos_x = self.pick_and_place.data.joint_pos[self.go_to_target, self._x_dof_idx[0]] - head_pos_y = self.pick_and_place.data.joint_pos[self.go_to_target, self._y_dof_idx[0]] - target_pos_x = self.target_pos[self.go_to_target, 0] - target_pos_y = self.target_pos[self.go_to_target, 1] + head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]] + head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]] + target_pos_x = self.target_pos[:, 0] + target_pos_y = self.target_pos[:, 1] d_target_robot_x = target_pos_x - head_pos_x d_target_robot_y = target_pos_y - head_pos_y - self.instant_controls[self.go_to_target] = torch.stack( - [d_target_robot_x * 5.0, d_target_robot_y * 5.0, torch.zeros_like(d_target_robot_x)], dim=1 + self.instant_controls[0] = torch.tensor( + [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device ) - # Set the joint effort targets for the picker self.pick_and_place.set_joint_effort_target( self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx @@ -267,7 +258,7 @@ def _apply_action(self) -> None: self.permanent_controls[:, 0].unsqueeze(dim=1), joint_ids=self._z_dof_idx ) # Set the gripper command - self.gripper.set_grippers_command(self.instant_controls[:, 2]) + self.gripper.set_grippers_command(self.instant_controls[:, 2].unsqueeze(dim=1)) def _get_observations(self) -> dict: # Get the observations @@ -406,11 +397,8 @@ def _debug_vis_callback(self, event): def main(): """Main function.""" - # create environment configuration - env_cfg = PickAndPlaceEnvCfg() - env_cfg.scene.num_envs = args_cli.num_envs # create environment - pick_and_place = PickAndPlaceEnv(env_cfg) + pick_and_place = PickAndPlaceEnv(PickAndPlaceEnvCfg()) obs, _ = pick_and_place.reset() while simulation_app.is_running(): # check for selected robots @@ -421,4 +409,4 @@ def main(): if __name__ == "__main__": main() - simulation_app.close() + simulation_app.close() \ No newline at end of file From c176374a0e6d4f15c9f0c37a412b123eb436d1f9 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 16:59:36 -0800 Subject: [PATCH 4/7] update --- source/isaaclab_rl/config/extension.toml | 2 +- source/isaaclab_rl/docs/CHANGELOG.rst | 8 ++++---- source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml index 35ce2649060..494f39f7456 100644 --- a/source/isaaclab_rl/config/extension.toml +++ b/source/isaaclab_rl/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.5.0" +version = "0.4.5" # Description title = "Isaac Lab RL" diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index 3698bf770e0..0305f5a99b1 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -1,13 +1,13 @@ Changelog --------- -0.5.0 (2025-11-10) +0.4.5 (2025-11-10) ~~~~~~~~~~~~~~~~~~ -Added -^^^^^ +Changed +^^^^^^^ -* Added support for decoupling RL device from simulation device in for RL wrappers. +* Added support for decoupling RL device from simulation device in for RL games wrapper. This allows users to run simulation on one device (e.g., CPU) while running RL training/inference on another device. diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 784892f7e37..6561ace8c93 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -213,4 +213,4 @@ def _modify_action_space(self): ) self.env.unwrapped.action_space = gym.vector.utils.batch_space( self.env.unwrapped.single_action_space, self.num_envs - ) + ) \ No newline at end of file From cd1764cf231681fb11ecda782f23c20a940c1678 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 17:04:43 -0800 Subject: [PATCH 5/7] format --- scripts/demos/pick_and_place.py | 2 +- .../reinforcement_learning/rsl_rl/train.py | 2 +- .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 35 ++----------------- 3 files changed, 5 insertions(+), 34 deletions(-) diff --git a/scripts/demos/pick_and_place.py b/scripts/demos/pick_and_place.py index bc6d35940f0..cc14dcb0a72 100644 --- a/scripts/demos/pick_and_place.py +++ b/scripts/demos/pick_and_place.py @@ -409,4 +409,4 @@ def main(): if __name__ == "__main__": main() - simulation_app.close() \ No newline at end of file + simulation_app.close() diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index ad739f4559a..8b66feb28aa 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -182,7 +182,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = gym.wrappers.RecordVideo(env, **video_kwargs) # wrap around environment for rsl-rl - env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions, rl_device=agent_cfg.device) + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) # create runner from rsl-rl if agent_cfg.class_name == "OnPolicyRunner": diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 6561ace8c93..73ceae04693 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -24,9 +24,7 @@ class RslRlVecEnvWrapper(VecEnv): https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py """ - def __init__( - self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None, rl_device: str | None = None - ): + def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None): """Initializes the wrapper. Note: @@ -35,8 +33,6 @@ def __init__( Args: env: The environment to wrap around. clip_actions: The clipping value for actions. If ``None``, then no clipping is done. - rl_device: The device for RL agent/policy. If ``None``, uses the environment device. - This allows running the RL agent on a different device than the environment. Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. @@ -53,21 +49,11 @@ def __init__( self.env = env self.clip_actions = clip_actions - # store the RL device (where policy/training happens) - # this may be different from env.device (where task buffers are) - if rl_device is None: - self.rl_device = self.unwrapped.device - else: - self.rl_device = rl_device - # store information required by wrapper self.num_envs = self.unwrapped.num_envs - self.device = self.rl_device + self.device = self.unwrapped.device self.max_episode_length = self.unwrapped.max_episode_length - # track the environment device separately - self.env_device = self.unwrapped.device - # obtain dimensions of the environment if hasattr(self.unwrapped, "action_manager"): self.num_actions = self.unwrapped.action_manager.total_action_dim @@ -153,9 +139,6 @@ def seed(self, seed: int = -1) -> int: # noqa: D102 def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 # reset the environment obs_dict, extras = self.env.reset() - # move observations to RL device if different from env device - if self.rl_device != self.env_device: - obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} return TensorDict(obs_dict, batch_size=[self.num_envs]), extras def get_observations(self) -> TensorDict: @@ -164,26 +147,14 @@ def get_observations(self) -> TensorDict: obs_dict = self.unwrapped.observation_manager.compute() else: obs_dict = self.unwrapped._get_observations() - # move observations to RL device if different from env device - if self.rl_device != self.env_device: - obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} return TensorDict(obs_dict, batch_size=[self.num_envs]) def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: - # move actions to env device if coming from different RL device - if self.rl_device != self.env_device: - actions = actions.to(self.env_device) # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) # record step information obs_dict, rew, terminated, truncated, extras = self.env.step(actions) - # move outputs to RL device if different from env device - if self.rl_device != self.env_device: - obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} - rew = rew.to(self.rl_device) - terminated = terminated.to(self.rl_device) - truncated = truncated.to(self.rl_device) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) # move time out information to the extras dict @@ -213,4 +184,4 @@ def _modify_action_space(self): ) self.env.unwrapped.action_space = gym.vector.utils.batch_space( self.env.unwrapped.single_action_space, self.num_envs - ) \ No newline at end of file + ) From 422f8e182da304754d58bcf303a56e967f7d532c Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 19:41:57 -0800 Subject: [PATCH 6/7] fix test --- .../test/test_rl_device_separation.py | 545 ++++-------------- 1 file changed, 118 insertions(+), 427 deletions(-) diff --git a/source/isaaclab_tasks/test/test_rl_device_separation.py b/source/isaaclab_tasks/test/test_rl_device_separation.py index ec3f7060877..2faeabbe1f0 100644 --- a/source/isaaclab_tasks/test/test_rl_device_separation.py +++ b/source/isaaclab_tasks/test/test_rl_device_separation.py @@ -24,22 +24,15 @@ 4. Wrapper: transfers outputs from sim_device → rl_device (for policy to use) Tested Libraries: - - RSL-RL: TensorDict observations, explicit rl_device parameter - * Transfers observations and rewards to rl_device - - RL Games: Dict observations, explicit rl_device parameter - * Transfers observations and rewards to rl_device + - RSL-RL: TensorDict observations, device separation via OnPolicyRunner (agent_cfg.device) + * Wrapper returns data on sim_device, Runner handles transfers to rl_device + - RL Games: Dict observations, explicit rl_device parameter in wrapper + * Wrapper transfers data from sim_device to rl_device - Stable-Baselines3: Numpy arrays (CPU-only by design) - * Always converts to/from numpy on CPU + * Wrapper converts tensors to/from numpy on CPU - skrl: Dict observations, uses skrl.config.torch.device for RL device - * Keeps observations on sim_device (policy handles transfer) - * Only transfers actions from rl_device to sim_device - -IMPORTANT: Due to Isaac Sim limitations, only ONE test can be run per pytest invocation. -Run tests individually: - pytest test_rl_device_separation.py::test_rsl_rl_device_separation_gpu_to_gpu -v -s - pytest test_rl_device_separation.py::test_rsl_rl_device_separation_gpu_to_cpu -v -s - pytest test_rl_device_separation.py::test_rl_games_device_separation_gpu_to_gpu -v -s - ... + * Wrapper keeps observations on sim_device, only transfers actions + """ from isaaclab.app import AppLauncher @@ -65,35 +58,23 @@ NUM_ENVS = 4 -def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): - """Helper function to test RSL-RL with specified device configuration. +def _create_env(sim_device: str): + """Create and initialize a test environment. Args: sim_device: Device for simulation (e.g., "cuda:0", "cpu") - rl_device: Device for RL agent (e.g., "cuda:0", "cpu") - """ - from tensordict import TensorDict - - from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper - - print(f"\n{'=' * 60}") - print(f">>> Testing RSL-RL with sim_device={sim_device}, rl_device={rl_device}") - print(f"{'=' * 60}") + Returns: + Initialized gym environment + """ # Create a new stage omni.usd.get_context().new_stage() # Reset the rtx sensors carb setting to False carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) try: - # Parse environment config - print(" [1/6] Parsing environment config...") env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) - - # Create environment - print(" [2/6] Creating environment (may take 5-10s)...") env = gym.make(TEST_ENV, cfg=env_cfg) - print(" [2/6] Environment created successfully") except Exception as e: # Try to close environment on exception if "env" in locals() and hasattr(env, "_is_closed"): @@ -105,96 +86,92 @@ def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): # Disable control on stop env.unwrapped.sim._app_control_on_stop_handle = None + return env + + +def _verify_unwrapped_env(env, sim_device: str): + """Verify unwrapped environment operates entirely on sim_device. - # Verify environment device - print(" [3/6] Verifying environment device...") - assert ( - env.unwrapped.device == sim_device - ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + Args: + env: Unwrapped gym environment + sim_device: Expected simulation device + """ + assert env.unwrapped.device == sim_device, \ + f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" - # Test environment directly before wrapping to verify it returns data on sim device - print(" [3/6] Testing unwrapped environment returns data on sim_device...") + # Verify reset returns data on sim device obs_dict, _ = env.reset() for key, value in obs_dict.items(): if isinstance(value, torch.Tensor): - assert ( - value.device.type == torch.device(sim_device).type - ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + assert value.device.type == torch.device(sim_device).type, \ + f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" - # Step unwrapped environment to verify outputs are on sim device + # Verify step returns data on sim device action_space = env.unwrapped.single_action_space test_action = torch.zeros(NUM_ENVS, action_space.shape[0], device=sim_device) obs_dict, rew, term, trunc, extras = env.step(test_action) - assert ( - rew.device.type == torch.device(sim_device).type - ), f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" - assert ( - term.device.type == torch.device(sim_device).type - ), f"Unwrapped env terminated should be on {sim_device}, got {term.device}" - print(f" [3/6] Verified: Unwrapped environment returns data on {sim_device}") - - # Create RSL-RL wrapper with RL device - print(" [4/6] Creating RSL-RL wrapper...") - env = RslRlVecEnvWrapper(env, rl_device=rl_device) - print(f" [4/6] Wrapper created (env_device={env.env_device}, rl_device={env.rl_device})") - - # Verify devices - assert env.env_device == sim_device, f"Wrapper env_device should be {sim_device}" - assert env.rl_device == rl_device, f"Wrapper RL device should be {rl_device}" - assert env.device == rl_device, f"Wrapper device property should be {rl_device}" - - # Reset and step to test device transfers - print(" [5/6] Testing reset and step operations...") - obs, extras = env.reset() - print(" [5/6] Reset completed") + assert rew.device.type == torch.device(sim_device).type, \ + f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" + assert term.device.type == torch.device(sim_device).type, \ + f"Unwrapped env terminated should be on {sim_device}, got {term.device}" + + +def _verify_tensor_device(data, expected_device: str, name: str): + """Verify tensor or dict of tensors is on expected device. + + Args: + data: Tensor, dict of tensors, or numpy array + expected_device: Expected device string + name: Name for error messages + """ + if isinstance(data, torch.Tensor): + assert data.device.type == torch.device(expected_device).type, \ + f"{name} should be on {expected_device}, got {data.device}" + elif isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, torch.Tensor): + assert value.device.type == torch.device(expected_device).type, \ + f"{name}['{key}'] should be on {expected_device}, got {value.device}" + + +def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): + """Helper function to test RSL-RL with specified device configuration. + + Note: RSL-RL device separation is handled by the OnPolicyRunner, not the wrapper. + The wrapper returns observations on sim_device, and the runner handles device transfers. + This test verifies the wrapper works correctly when actions come from a different device. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") - where policy generates actions + """ + from tensordict import TensorDict + from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper + + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) - # Verify observations are on RL device (RSL-RL returns TensorDict) + # Create wrapper - it uses sim_device, runner handles rl_device + env = RslRlVecEnvWrapper(env) + assert env.device == sim_device, f"Wrapper device should be {sim_device}" + + # Test reset - wrapper returns observations on sim_device + obs, extras = env.reset() assert isinstance(obs, TensorDict), f"Expected TensorDict, got {type(obs)}" - for key, value in obs.items(): - if isinstance(value, torch.Tensor): - assert ( - value.device.type == torch.device(rl_device).type - ), f"Observation '{key}' should be on {rl_device}, got {value.device}" + _verify_tensor_device(obs, sim_device, "Observation") - # Sample random action on RL device (simulating policy output) - # RSL-RL: action_space.shape is (num_envs, action_dim) + # Test step with action from RL device (simulating policy output) + # The wrapper should handle transferring action to sim_device internally action = 2 * torch.rand(env.action_space.shape, device=rl_device) - 1 - print(f" [5/6] Action created on rl_device: {action.device}, shape: {action.shape}") - - # Verify action is on RL device before calling step - assert ( - action.device.type == torch.device(rl_device).type - ), f"Action should be on {rl_device} before step, got {action.device}" - - # Step environment - wrapper should: - # 1. Accept action on rl_device - # 2. Transfer action from rl_device to sim_device internally - # 3. Call unwrapped env.step() with action on sim_device - # 4. Transfer outputs from sim_device to rl_device obs, reward, dones, extras = env.step(action) - print(" [5/6] Step completed - wrapper handled device transfers") - # Verify all outputs are on RL device (wrapper transferred from sim_device) - print(" [6/6] Verifying device transfers...") + # Verify outputs are on sim_device (runner would transfer to rl_device) assert isinstance(obs, TensorDict), f"Expected TensorDict, got {type(obs)}" - for key, value in obs.items(): - if isinstance(value, torch.Tensor): - assert ( - value.device.type == torch.device(rl_device).type - ), f"Step observation '{key}' should be on {rl_device}, got {value.device}" - assert reward.device.type == torch.device(rl_device).type, f"Rewards should be on {rl_device}, got {reward.device}" - assert dones.device.type == torch.device(rl_device).type, f"Dones should be on {rl_device}, got {dones.device}" - - # Cleanup - print(" [6/6] Cleaning up environment...") + _verify_tensor_device(obs, sim_device, "Step observation") + _verify_tensor_device(reward, sim_device, "Reward") + _verify_tensor_device(dones, sim_device, "Dones") + env.close() - print(f"✓ RSL-RL test PASSED for sim_device={sim_device}, rl_device={rl_device}") - print(" Wrapper device transfer verified:") - print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") - print(f" 2. Wrapper: accepts actions on {rl_device} (from policy)") - print(f" 3. Wrapper: internally transfers actions to {sim_device} for env.step()") - print(f" 4. Wrapper: transfers outputs from {sim_device} to {rl_device} (for policy)") - print("-" * 80) def _test_rl_games_device_separation(sim_device: str, rl_device: str): @@ -206,125 +183,26 @@ def _test_rl_games_device_separation(sim_device: str, rl_device: str): """ from isaaclab_rl.rl_games import RlGamesVecEnvWrapper - print(f"\n{'=' * 60}") - print(f">>> Testing RL Games with sim_device={sim_device}, rl_device={rl_device}") - print(f"{'=' * 60}") + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) - # Create a new stage - omni.usd.get_context().new_stage() - # Reset the rtx sensors carb setting to False - carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) - - try: - # Parse environment config - print(" [1/5] Parsing environment config...") - env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) - - # Create environment - print(" [2/5] Creating environment (may take 5-10s)...") - env = gym.make(TEST_ENV, cfg=env_cfg) - print(" [2/5] Environment created successfully") - except Exception as e: - # Try to close environment on exception - if "env" in locals() and hasattr(env, "_is_closed"): - env.close() - else: - if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): - e.obj.close() - pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") - - # Disable control on stop - env.unwrapped.sim._app_control_on_stop_handle = None - - # Verify environment device - print(" [3/5] Verifying environment device...") - assert ( - env.unwrapped.device == sim_device - ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" - - # Test environment directly before wrapping to verify it returns data on sim device - print(" [3/5] Testing unwrapped environment returns data on sim_device...") - obs_dict, _ = env.reset() - for key, value in obs_dict.items(): - if isinstance(value, torch.Tensor): - assert ( - value.device.type == torch.device(sim_device).type - ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" - - # Step unwrapped environment to verify outputs are on sim device - action_space = env.unwrapped.single_action_space - test_action = torch.zeros(NUM_ENVS, action_space.shape[0], device=sim_device) - obs_dict, rew, term, trunc, extras = env.step(test_action) - assert ( - rew.device.type == torch.device(sim_device).type - ), f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" - assert ( - term.device.type == torch.device(sim_device).type - ), f"Unwrapped env terminated should be on {sim_device}, got {term.device}" - print(f" [3/5] Verified: Unwrapped environment returns data on {sim_device}") - - # Create RL Games wrapper with RL device - print(" [3/5] Creating RL Games wrapper...") + # Create wrapper env = RlGamesVecEnvWrapper(env, rl_device=rl_device, clip_obs=10.0, clip_actions=1.0) - # Reset and step to test device transfers - print(" [4/5] Testing reset and step operations...") + # Test reset obs = env.reset() - print(" [4/5] Reset completed") - - # Verify observations are on RL device - if isinstance(obs, dict): - for key, value in obs.items(): - assert ( - value.device.type == torch.device(rl_device).type - ), f"Observation '{key}' should be on {rl_device}, got {value.device}" - else: - assert ( - obs.device.type == torch.device(rl_device).type - ), f"Observation should be on {rl_device}, got {obs.device}" + _verify_tensor_device(obs, rl_device, "Observation") - # Sample random action on RL device (simulating policy output) + # Test step with action on RL device action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=rl_device) - 1 - print(f" [4/5] Action created on rl_device: {action.device}, shape: {action.shape}") - - # Verify action is on RL device before calling step - assert ( - action.device.type == torch.device(rl_device).type - ), f"Action should be on {rl_device} before step, got {action.device}" - - # Step environment - wrapper should: - # 1. Accept action on rl_device - # 2. Transfer action from rl_device to sim_device internally - # 3. Call unwrapped env.step() with action on sim_device - # 4. Transfer outputs from sim_device to rl_device obs, reward, dones, info = env.step(action) - print(" [4/5] Step completed - wrapper handled device transfers") - - # Verify all outputs are on RL device (wrapper transferred from sim_device) - print(" [5/5] Verifying device transfers...") - # RL Games returns flat tensor for observations - if isinstance(obs, dict): - for key, value in obs.items(): - assert ( - value.device.type == torch.device(rl_device).type - ), f"Observation '{key}' should be on {rl_device}, got {value.device}" - else: - assert ( - obs.device.type == torch.device(rl_device).type - ), f"Observations should be on {rl_device}, got {obs.device}" - assert reward.device.type == torch.device(rl_device).type, f"Rewards should be on {rl_device}, got {reward.device}" - assert dones.device.type == torch.device(rl_device).type, f"Dones should be on {rl_device}, got {dones.device}" - - # Cleanup - print(" [5/5] Cleaning up environment...") + + # Verify outputs are on RL device + _verify_tensor_device(obs, rl_device, "Observation") + _verify_tensor_device(reward, rl_device, "Reward") + _verify_tensor_device(dones, rl_device, "Dones") + env.close() - print(f"✓ RL Games test PASSED for sim_device={sim_device}, rl_device={rl_device}") - print(" Wrapper device transfer verified:") - print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") - print(f" 2. Wrapper: accepts actions on {rl_device} (from policy)") - print(f" 3. Wrapper: internally transfers actions to {sim_device} for env.step()") - print(f" 4. Wrapper: transfers outputs from {sim_device} to {rl_device} (for policy)") - print("-" * 80) def _test_sb3_device_separation(sim_device: str): @@ -336,106 +214,39 @@ def _test_sb3_device_separation(sim_device: str): sim_device: Device for simulation (e.g., "cuda:0", "cpu") """ import numpy as np - from isaaclab_rl.sb3 import Sb3VecEnvWrapper - print(f"\n{'=' * 60}") - print(f">>> Testing SB3 with sim_device={sim_device}") - print(f"{'=' * 60}") + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) - # Create a new stage - omni.usd.get_context().new_stage() - # Reset the rtx sensors carb setting to False - carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) - - try: - # Parse environment config - print(" [1/5] Parsing environment config...") - env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) - - # Create environment - print(" [2/5] Creating environment (may take 5-10s)...") - env = gym.make(TEST_ENV, cfg=env_cfg) - print(" [2/5] Environment created successfully") - except Exception as e: - # Try to close environment on exception - if "env" in locals() and hasattr(env, "_is_closed"): - env.close() - else: - if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): - e.obj.close() - pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") - - # Disable control on stop - env.unwrapped.sim._app_control_on_stop_handle = None - - # Verify environment device - print(" [3/5] Verifying environment device...") - assert ( - env.unwrapped.device == sim_device - ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" - - # Test environment directly before wrapping to verify it returns data on sim device - print(" [3/5] Testing unwrapped environment returns data on sim_device...") - obs_dict, _ = env.reset() - for key, value in obs_dict.items(): - if isinstance(value, torch.Tensor): - assert ( - value.device.type == torch.device(sim_device).type - ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" - print(f" [3/5] Verified: Unwrapped environment returns data on {sim_device}") - - # Create SB3 wrapper (always converts to numpy/CPU) - print(" [3/5] Creating SB3 wrapper...") + # Create wrapper env = Sb3VecEnvWrapper(env) - # Reset and step to test device transfers - print(" [4/5] Testing reset and step operations...") + # Test reset - SB3 should return numpy arrays obs = env.reset() - print(" [4/5] Reset completed") - - # SB3 observations should always be numpy arrays (on CPU) assert isinstance(obs, np.ndarray), f"SB3 observations should be numpy arrays, got {type(obs)}" - # Sample random action (SB3 uses numpy) + # Test step with numpy action action = 2 * np.random.rand(env.num_envs, *env.action_space.shape) - 1 - assert isinstance(action, np.ndarray), f"Action should be numpy array, got {type(action)}" - print(f" [4/5] Action sampled (numpy array), shape: {action.shape}") - - # Step environment - wrapper should: - # 1. Convert numpy action to torch tensor on sim_device internally - # 2. Call unwrapped env.step() with action on sim_device - # 3. Convert outputs from sim_device tensors to numpy arrays obs, reward, done, info = env.step(action) - print(" [4/5] Step completed, outputs converted to numpy") - # Verify all outputs are numpy arrays (wrapper transferred and converted) - print(" [5/5] Verifying numpy conversions...") + # Verify outputs are numpy arrays assert isinstance(obs, np.ndarray), f"Observations should be numpy arrays, got {type(obs)}" assert isinstance(reward, np.ndarray), f"Rewards should be numpy arrays, got {type(reward)}" assert isinstance(done, np.ndarray), f"Dones should be numpy arrays, got {type(done)}" - # Cleanup - print(" [5/5] Cleaning up environment...") env.close() - print(f"✓ SB3 test PASSED for sim_device={sim_device}") - print(" Wrapper device transfer verified:") - print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") - print(" 2. Wrapper: accepts numpy arrays (from policy on CPU)") - print(f" 3. Wrapper: internally converts to tensors on {sim_device} for env.step()") - print(f" 4. Wrapper: converts outputs from {sim_device} tensors to numpy arrays (for policy)") - print("-" * 80) def _test_skrl_device_separation(sim_device: str, rl_device: str): """Helper function to test skrl with specified device configuration. Note: skrl uses skrl.config.torch.device for device configuration. - This can be set via agent_cfg["device"] for consistency with other libraries. + Observations remain on sim_device; only actions are transferred from rl_device. Args: sim_device: Device for simulation (e.g., "cuda:0", "cpu") - rl_device: Device for RL agent (e.g., "cuda:0", "cpu") - set via skrl.config.torch.device + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") """ try: import skrl @@ -443,159 +254,39 @@ def _test_skrl_device_separation(sim_device: str, rl_device: str): except ImportError: pytest.skip("skrl not installed") - print(f"\n{'=' * 60}") - print(f">>> Testing skrl with sim_device={sim_device}, rl_device={rl_device}") - print(f" Using skrl.config.torch.device = {rl_device}") - print(f"{'=' * 60}") - - # Create agent config with device parameter (for demonstration/consistency) - agent_cfg = {"device": rl_device} - - # Configure skrl device (can be set from agent_cfg for consistency with other libraries) - if "device" in agent_cfg: - skrl.config.torch.device = torch.device(agent_cfg["device"]) - else: - skrl.config.torch.device = torch.device(rl_device) + # Configure skrl device + skrl.config.torch.device = torch.device(rl_device) - # Create a new stage - omni.usd.get_context().new_stage() - # Reset the rtx sensors carb setting to False - carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) - try: - # Parse environment config - print(" [1/6] Parsing environment config...") - env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) - - # Create environment - print(" [2/6] Creating environment (may take 5-10s)...") - env = gym.make(TEST_ENV, cfg=env_cfg) - print(" [2/6] Environment created successfully") - except Exception as e: - # Try to close environment on exception - if "env" in locals() and hasattr(env, "_is_closed"): - env.close() - else: - if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): - e.obj.close() - pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") - - # Disable control on stop - env.unwrapped.sim._app_control_on_stop_handle = None - - # Verify environment device - print(" [3/6] Verifying environment device...") - assert ( - env.unwrapped.device == sim_device - ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" - - # Test environment directly before wrapping to verify it returns data on sim device - print(" [3/6] Testing unwrapped environment returns data on sim_device...") - obs_dict, _ = env.reset() - for key, value in obs_dict.items(): - if isinstance(value, torch.Tensor): - assert ( - value.device.type == torch.device(sim_device).type - ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" - print(f" [3/6] Verified: Unwrapped environment returns data on {sim_device}") - - # Wrap with skrl (will use skrl.config.torch.device for policy) - print(" [3/6] Creating skrl wrapper...") + # Wrap with skrl env = wrap_env(env, wrapper="isaaclab") - # Reset to test basic functionality - print(" [4/6] Testing reset and step operations...") + # Test reset obs, info = env.reset() - print(" [4/6] Reset completed") + assert isinstance(obs, (dict, torch.Tensor)), f"Observations should be dict or tensor, got {type(obs)}" - # Verify observations are tensors or dict - # skrl can return either dict or tensor depending on configuration - if isinstance(obs, dict): - assert isinstance(obs["policy"], torch.Tensor), f"Observations should be tensors, got {type(obs['policy'])}" - else: - assert isinstance(obs, torch.Tensor), f"Observations should be tensors, got {type(obs)}" - - # Sample random action on RL device (simulating policy output - skrl always uses GPU for training) - rl_device_obj = skrl.config.torch.device - action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=rl_device_obj) - 1 - print(f" [4/6] Action created on rl_device: {rl_device_obj}, shape: {action.shape}") - - # Verify action is on RL device before calling step - assert ( - action.device.type == rl_device_obj.type - ), f"Action should be on {rl_device_obj} before step, got {action.device}" - - # Step environment - wrapper should: - # 1. Accept action on rl_device - # 2. Transfer action from rl_device to sim_device internally - # 3. Call unwrapped env.step() with action on sim_device - # 4. Return outputs on sim_device (skrl policy handles device transfer) - print(" [5/6] Testing step with action on rl_device...") + # Test step with action on RL device + action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=skrl.config.torch.device) - 1 transition = env.step(action) - print(" [5/6] Step completed - wrapper handled action device transfer") - # Verify outputs are tensors - # Note: skrl wrapper returns outputs on sim_device, not rl_device - # The policy is responsible for transferring observations when needed - print(" [6/6] Verifying outputs are on sim_device (skrl behavior)...") + # Verify outputs - skrl keeps them on sim_device if len(transition) == 5: obs, reward, terminated, truncated, info = transition - # Check observations (can be dict or tensor) - if isinstance(obs, dict): - assert isinstance(obs["policy"], torch.Tensor), "Observations should be tensors" - assert ( - obs["policy"].device.type == torch.device(sim_device).type - ), f"Observations should be on {sim_device}, got {obs['policy'].device}" - else: - assert isinstance(obs, torch.Tensor), "Observations should be tensors" - assert ( - obs.device.type == torch.device(sim_device).type - ), f"Observations should be on {sim_device}, got {obs.device}" - assert isinstance(reward, torch.Tensor), "Rewards should be tensors" - assert ( - reward.device.type == torch.device(sim_device).type - ), f"Rewards should be on {sim_device}, got {reward.device}" - assert isinstance(terminated, torch.Tensor), "Terminated should be tensors" - assert ( - terminated.device.type == torch.device(sim_device).type - ), f"Terminated should be on {sim_device}, got {terminated.device}" - assert isinstance(truncated, torch.Tensor), "Truncated should be tensors" - assert ( - truncated.device.type == torch.device(sim_device).type - ), f"Truncated should be on {sim_device}, got {truncated.device}" + _verify_tensor_device(obs, sim_device, "Observation") + _verify_tensor_device(reward, sim_device, "Reward") + _verify_tensor_device(terminated, sim_device, "Terminated") + _verify_tensor_device(truncated, sim_device, "Truncated") elif len(transition) == 4: obs, reward, done, info = transition - # Check observations (can be dict or tensor) - if isinstance(obs, dict): - assert isinstance(obs["policy"], torch.Tensor), "Observations should be tensors" - assert ( - obs["policy"].device.type == torch.device(sim_device).type - ), f"Observations should be on {sim_device}, got {obs['policy'].device}" - else: - assert isinstance(obs, torch.Tensor), "Observations should be tensors" - assert ( - obs.device.type == torch.device(sim_device).type - ), f"Observations should be on {sim_device}, got {obs.device}" - assert isinstance(reward, torch.Tensor), "Rewards should be tensors" - assert ( - reward.device.type == torch.device(sim_device).type - ), f"Rewards should be on {sim_device}, got {reward.device}" - assert isinstance(done, torch.Tensor), "Dones should be tensors" - assert done.device.type == torch.device(sim_device).type, f"Dones should be on {sim_device}, got {done.device}" + _verify_tensor_device(obs, sim_device, "Observation") + _verify_tensor_device(reward, sim_device, "Reward") + _verify_tensor_device(done, sim_device, "Done") else: pytest.fail(f"Unexpected number of return values from step: {len(transition)}") - # Cleanup - print(" [6/6] Cleaning up environment...") env.close() - print(f"✓ skrl test PASSED for sim_device={sim_device}, rl_device={rl_device_obj}") - print(" Wrapper device transfer verified (skrl-specific behavior):") - print(f" 1. Unwrapped env: expects actions on {sim_device}, returns data on {sim_device}") - print(f" 2. Wrapper: accepts actions on {rl_device_obj} (from policy)") - print(f" 3. Wrapper: internally transfers actions to {sim_device} for env.step()") - print(f" 4. Wrapper: returns outputs on {sim_device} (policy handles obs device transfer)") - print(" Note: Unlike RSL-RL/RL-Games, skrl keeps observations on sim_device") - print("-" * 80) # ============================================================================ From ec417d443d4c465f8318d318f2705b5fed1e3151 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Mon, 10 Nov 2025 19:42:14 -0800 Subject: [PATCH 7/7] format --- .../test/test_rl_device_separation.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/source/isaaclab_tasks/test/test_rl_device_separation.py b/source/isaaclab_tasks/test/test_rl_device_separation.py index 2faeabbe1f0..3dc588b3a6c 100644 --- a/source/isaaclab_tasks/test/test_rl_device_separation.py +++ b/source/isaaclab_tasks/test/test_rl_device_separation.py @@ -96,24 +96,28 @@ def _verify_unwrapped_env(env, sim_device: str): env: Unwrapped gym environment sim_device: Expected simulation device """ - assert env.unwrapped.device == sim_device, \ - f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + assert ( + env.unwrapped.device == sim_device + ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" # Verify reset returns data on sim device obs_dict, _ = env.reset() for key, value in obs_dict.items(): if isinstance(value, torch.Tensor): - assert value.device.type == torch.device(sim_device).type, \ - f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + assert ( + value.device.type == torch.device(sim_device).type + ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" # Verify step returns data on sim device action_space = env.unwrapped.single_action_space test_action = torch.zeros(NUM_ENVS, action_space.shape[0], device=sim_device) obs_dict, rew, term, trunc, extras = env.step(test_action) - assert rew.device.type == torch.device(sim_device).type, \ - f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" - assert term.device.type == torch.device(sim_device).type, \ - f"Unwrapped env terminated should be on {sim_device}, got {term.device}" + assert ( + rew.device.type == torch.device(sim_device).type + ), f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" + assert ( + term.device.type == torch.device(sim_device).type + ), f"Unwrapped env terminated should be on {sim_device}, got {term.device}" def _verify_tensor_device(data, expected_device: str, name: str): @@ -125,13 +129,15 @@ def _verify_tensor_device(data, expected_device: str, name: str): name: Name for error messages """ if isinstance(data, torch.Tensor): - assert data.device.type == torch.device(expected_device).type, \ - f"{name} should be on {expected_device}, got {data.device}" + assert ( + data.device.type == torch.device(expected_device).type + ), f"{name} should be on {expected_device}, got {data.device}" elif isinstance(data, dict): for key, value in data.items(): if isinstance(value, torch.Tensor): - assert value.device.type == torch.device(expected_device).type, \ - f"{name}['{key}'] should be on {expected_device}, got {value.device}" + assert ( + value.device.type == torch.device(expected_device).type + ), f"{name}['{key}'] should be on {expected_device}, got {value.device}" def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): @@ -146,6 +152,7 @@ def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): rl_device: Device for RL agent (e.g., "cuda:0", "cpu") - where policy generates actions """ from tensordict import TensorDict + from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper env = _create_env(sim_device) @@ -214,6 +221,7 @@ def _test_sb3_device_separation(sim_device: str): sim_device: Device for simulation (e.g., "cuda:0", "cpu") """ import numpy as np + from isaaclab_rl.sb3 import Sb3VecEnvWrapper env = _create_env(sim_device)