diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index d6faec37316..135980e92c7 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -95,10 +95,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # override configurations with non-hydra CLI arguments env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device - # update agent device to match simulation device - if args_cli.device is not None: - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device # randomly sample a seed if seed = -1 if args_cli.seed == -1: diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index 634e5975676..d6900a3789f 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -102,11 +102,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen "Please use GPU device (e.g., --device cuda) for distributed training." ) - # update agent device to match simulation device - if args_cli.device is not None: - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device - # randomly sample a seed if seed = -1 if args_cli.seed == -1: args_cli.seed = random.randint(0, 10000) diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml index 0e2f31470b6..494f39f7456 100644 --- a/source/isaaclab_rl/config/extension.toml +++ b/source/isaaclab_rl/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.4.4" +version = "0.4.5" # Description title = "Isaac Lab RL" diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index e3d44a08d96..0305f5a99b1 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -1,6 +1,16 @@ Changelog --------- +0.4.5 (2025-11-10) +~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Added support for decoupling RL device from simulation device in for RL games wrapper. + This allows users to run simulation on one device (e.g., CPU) while running RL training/inference on another device. + + 0.4.4 (2025-10-15) ~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py index 8c448c172ac..22df1e8bef4 100644 --- a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py +++ b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py @@ -319,6 +319,10 @@ def _process_obs(self, obs_dict: VecEnvObs) -> dict[str, torch.Tensor] | dict[st - ``"obs"``: either a concatenated tensor (``concate_obs_group=True``) or a Dict of group tensors. - ``"states"`` (optional): same structure as above when state groups are configured; omitted otherwise. """ + # move observations to RL device if different from sim device + if self._rl_device != self._sim_device: + obs_dict = {key: obs.to(device=self._rl_device) for key, obs in obs_dict.items()} + # clip the observations for key, obs in obs_dict.items(): obs_dict[key] = torch.clamp(obs, -self._clip_obs, self._clip_obs) diff --git a/source/isaaclab_tasks/test/test_rl_device_separation.py b/source/isaaclab_tasks/test/test_rl_device_separation.py new file mode 100644 index 00000000000..3dc588b3a6c --- /dev/null +++ b/source/isaaclab_tasks/test/test_rl_device_separation.py @@ -0,0 +1,379 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Test RL device separation across all supported RL libraries. + +This test verifies that RL library wrappers correctly handle device transfers when the +simulation device differs from the RL training device. + +Device Architecture: + 1. sim_device: Where physics simulation runs and environment buffers live + 2. rl_device: Where policy networks and training computations occur + +Test Scenarios: + - GPU simulation + GPU RL: Same device (no transfers needed, optimal performance) + - GPU simulation + CPU RL: Cross-device transfers (wrapper handles transfers) + - CPU simulation + CPU RL: CPU-only operation + +Each test verifies the wrapper correctly: + 1. Unwrapped env: operates entirely on sim_device + 2. Wrapper: accepts actions on rl_device (where policy generates them) + 3. Wrapper: internally transfers actions from rl_device → sim_device for env.step() + 4. Wrapper: transfers outputs from sim_device → rl_device (for policy to use) + +Tested Libraries: + - RSL-RL: TensorDict observations, device separation via OnPolicyRunner (agent_cfg.device) + * Wrapper returns data on sim_device, Runner handles transfers to rl_device + - RL Games: Dict observations, explicit rl_device parameter in wrapper + * Wrapper transfers data from sim_device to rl_device + - Stable-Baselines3: Numpy arrays (CPU-only by design) + * Wrapper converts tensors to/from numpy on CPU + - skrl: Dict observations, uses skrl.config.torch.device for RL device + * Wrapper keeps observations on sim_device, only transfers actions + +""" + +from isaaclab.app import AppLauncher + +# launch the simulator +app_launcher = AppLauncher(headless=True) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import gymnasium as gym +import torch + +import carb +import omni.usd +import pytest + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils.parse_cfg import parse_env_cfg + +# Test environment - use Cartpole as it's simple and fast +TEST_ENV = "Isaac-Cartpole-v0" +NUM_ENVS = 4 + + +def _create_env(sim_device: str): + """Create and initialize a test environment. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + + Returns: + Initialized gym environment + """ + # Create a new stage + omni.usd.get_context().new_stage() + # Reset the rtx sensors carb setting to False + carb.settings.get_settings().set_bool("/isaaclab/render/rtx_sensors", False) + + try: + env_cfg = parse_env_cfg(TEST_ENV, device=sim_device, num_envs=NUM_ENVS) + env = gym.make(TEST_ENV, cfg=env_cfg) + except Exception as e: + # Try to close environment on exception + if "env" in locals() and hasattr(env, "_is_closed"): + env.close() + else: + if hasattr(e, "obj") and hasattr(e.obj, "_is_closed"): + e.obj.close() + pytest.fail(f"Failed to set-up the environment for task {TEST_ENV}. Error: {e}") + + # Disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None + return env + + +def _verify_unwrapped_env(env, sim_device: str): + """Verify unwrapped environment operates entirely on sim_device. + + Args: + env: Unwrapped gym environment + sim_device: Expected simulation device + """ + assert ( + env.unwrapped.device == sim_device + ), f"Environment device mismatch: expected {sim_device}, got {env.unwrapped.device}" + + # Verify reset returns data on sim device + obs_dict, _ = env.reset() + for key, value in obs_dict.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(sim_device).type + ), f"Unwrapped env obs '{key}' should be on {sim_device}, got {value.device}" + + # Verify step returns data on sim device + action_space = env.unwrapped.single_action_space + test_action = torch.zeros(NUM_ENVS, action_space.shape[0], device=sim_device) + obs_dict, rew, term, trunc, extras = env.step(test_action) + assert ( + rew.device.type == torch.device(sim_device).type + ), f"Unwrapped env rewards should be on {sim_device}, got {rew.device}" + assert ( + term.device.type == torch.device(sim_device).type + ), f"Unwrapped env terminated should be on {sim_device}, got {term.device}" + + +def _verify_tensor_device(data, expected_device: str, name: str): + """Verify tensor or dict of tensors is on expected device. + + Args: + data: Tensor, dict of tensors, or numpy array + expected_device: Expected device string + name: Name for error messages + """ + if isinstance(data, torch.Tensor): + assert ( + data.device.type == torch.device(expected_device).type + ), f"{name} should be on {expected_device}, got {data.device}" + elif isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, torch.Tensor): + assert ( + value.device.type == torch.device(expected_device).type + ), f"{name}['{key}'] should be on {expected_device}, got {value.device}" + + +def _test_rsl_rl_device_separation(sim_device: str, rl_device: str): + """Helper function to test RSL-RL with specified device configuration. + + Note: RSL-RL device separation is handled by the OnPolicyRunner, not the wrapper. + The wrapper returns observations on sim_device, and the runner handles device transfers. + This test verifies the wrapper works correctly when actions come from a different device. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") - where policy generates actions + """ + from tensordict import TensorDict + + from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper + + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) + + # Create wrapper - it uses sim_device, runner handles rl_device + env = RslRlVecEnvWrapper(env) + assert env.device == sim_device, f"Wrapper device should be {sim_device}" + + # Test reset - wrapper returns observations on sim_device + obs, extras = env.reset() + assert isinstance(obs, TensorDict), f"Expected TensorDict, got {type(obs)}" + _verify_tensor_device(obs, sim_device, "Observation") + + # Test step with action from RL device (simulating policy output) + # The wrapper should handle transferring action to sim_device internally + action = 2 * torch.rand(env.action_space.shape, device=rl_device) - 1 + obs, reward, dones, extras = env.step(action) + + # Verify outputs are on sim_device (runner would transfer to rl_device) + assert isinstance(obs, TensorDict), f"Expected TensorDict, got {type(obs)}" + _verify_tensor_device(obs, sim_device, "Step observation") + _verify_tensor_device(reward, sim_device, "Reward") + _verify_tensor_device(dones, sim_device, "Dones") + + env.close() + + +def _test_rl_games_device_separation(sim_device: str, rl_device: str): + """Helper function to test RL Games with specified device configuration. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") + """ + from isaaclab_rl.rl_games import RlGamesVecEnvWrapper + + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) + + # Create wrapper + env = RlGamesVecEnvWrapper(env, rl_device=rl_device, clip_obs=10.0, clip_actions=1.0) + + # Test reset + obs = env.reset() + _verify_tensor_device(obs, rl_device, "Observation") + + # Test step with action on RL device + action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=rl_device) - 1 + obs, reward, dones, info = env.step(action) + + # Verify outputs are on RL device + _verify_tensor_device(obs, rl_device, "Observation") + _verify_tensor_device(reward, rl_device, "Reward") + _verify_tensor_device(dones, rl_device, "Dones") + + env.close() + + +def _test_sb3_device_separation(sim_device: str): + """Helper function to test Stable-Baselines3 with specified device configuration. + + Note: SB3 always converts to CPU/numpy, so we don't test rl_device parameter. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + """ + import numpy as np + + from isaaclab_rl.sb3 import Sb3VecEnvWrapper + + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) + + # Create wrapper + env = Sb3VecEnvWrapper(env) + + # Test reset - SB3 should return numpy arrays + obs = env.reset() + assert isinstance(obs, np.ndarray), f"SB3 observations should be numpy arrays, got {type(obs)}" + + # Test step with numpy action + action = 2 * np.random.rand(env.num_envs, *env.action_space.shape) - 1 + obs, reward, done, info = env.step(action) + + # Verify outputs are numpy arrays + assert isinstance(obs, np.ndarray), f"Observations should be numpy arrays, got {type(obs)}" + assert isinstance(reward, np.ndarray), f"Rewards should be numpy arrays, got {type(reward)}" + assert isinstance(done, np.ndarray), f"Dones should be numpy arrays, got {type(done)}" + + env.close() + + +def _test_skrl_device_separation(sim_device: str, rl_device: str): + """Helper function to test skrl with specified device configuration. + + Note: skrl uses skrl.config.torch.device for device configuration. + Observations remain on sim_device; only actions are transferred from rl_device. + + Args: + sim_device: Device for simulation (e.g., "cuda:0", "cpu") + rl_device: Device for RL agent (e.g., "cuda:0", "cpu") + """ + try: + import skrl + from skrl.envs.wrappers.torch import wrap_env + except ImportError: + pytest.skip("skrl not installed") + + # Configure skrl device + skrl.config.torch.device = torch.device(rl_device) + + env = _create_env(sim_device) + _verify_unwrapped_env(env, sim_device) + + # Wrap with skrl + env = wrap_env(env, wrapper="isaaclab") + + # Test reset + obs, info = env.reset() + assert isinstance(obs, (dict, torch.Tensor)), f"Observations should be dict or tensor, got {type(obs)}" + + # Test step with action on RL device + action = 2 * torch.rand(NUM_ENVS, *env.action_space.shape, device=skrl.config.torch.device) - 1 + transition = env.step(action) + + # Verify outputs - skrl keeps them on sim_device + if len(transition) == 5: + obs, reward, terminated, truncated, info = transition + _verify_tensor_device(obs, sim_device, "Observation") + _verify_tensor_device(reward, sim_device, "Reward") + _verify_tensor_device(terminated, sim_device, "Terminated") + _verify_tensor_device(truncated, sim_device, "Truncated") + elif len(transition) == 4: + obs, reward, done, info = transition + _verify_tensor_device(obs, sim_device, "Observation") + _verify_tensor_device(reward, sim_device, "Reward") + _verify_tensor_device(done, sim_device, "Done") + else: + pytest.fail(f"Unexpected number of return values from step: {len(transition)}") + + env.close() + + +# ============================================================================ +# Test Functions +# ============================================================================ + + +def test_rsl_rl_device_separation_gpu_to_gpu(): + """Test RSL-RL with GPU simulation and GPU RL (default configuration).""" + try: + import isaaclab_rl.rsl_rl # noqa: F401 + except ImportError: + pytest.skip("RSL-RL not installed") + + _test_rsl_rl_device_separation(sim_device="cuda:0", rl_device="cuda:0") + + +def test_rsl_rl_device_separation_gpu_to_cpu(): + """Test RSL-RL with GPU simulation and CPU RL (cross-device transfer).""" + try: + import isaaclab_rl.rsl_rl # noqa: F401 + except ImportError: + pytest.skip("RSL-RL not installed") + + _test_rsl_rl_device_separation(sim_device="cuda:0", rl_device="cpu") + + +def test_rl_games_device_separation_gpu_to_gpu(): + """Test RL Games with GPU simulation and GPU RL (default configuration).""" + try: + import isaaclab_rl.rl_games # noqa: F401 + except ImportError: + pytest.skip("RL Games not installed") + + _test_rl_games_device_separation(sim_device="cuda:0", rl_device="cuda:0") + + +def test_rl_games_device_separation_gpu_to_cpu(): + """Test RL Games with GPU simulation and CPU RL (cross-device transfer).""" + try: + import isaaclab_rl.rl_games # noqa: F401 + except ImportError: + pytest.skip("RL Games not installed") + + _test_rl_games_device_separation(sim_device="cuda:0", rl_device="cpu") + + +def test_sb3_device_separation_gpu(): + """Test Stable-Baselines3 with GPU simulation. + + Note: SB3 always converts to CPU/numpy, so only GPU simulation is tested. + """ + try: + import isaaclab_rl.sb3 # noqa: F401 + except ImportError: + pytest.skip("Stable-Baselines3 not installed") + + _test_sb3_device_separation(sim_device="cuda:0") + + +def test_skrl_device_separation_gpu(): + """Test skrl with GPU simulation and GPU policy (matching devices).""" + try: + import skrl # noqa: F401 + except ImportError: + pytest.skip("skrl not installed") + + _test_skrl_device_separation(sim_device="cuda:0", rl_device="cuda:0") + + +def test_skrl_device_separation_cpu_to_gpu(): + """Test skrl with CPU simulation and GPU policy. + + Note: Uses skrl.config.torch.device to set the policy device to GPU + while the environment runs on CPU. + """ + try: + import skrl # noqa: F401 + except ImportError: + pytest.skip("skrl not installed") + + _test_skrl_device_separation(sim_device="cpu", rl_device="cuda:0")