diff --git a/scripts/demos/pick_and_place.py b/scripts/demos/pick_and_place.py index cc14dcb0a72..f4b3bc8ef87 100644 --- a/scripts/demos/pick_and_place.py +++ b/scripts/demos/pick_and_place.py @@ -11,6 +11,7 @@ # add argparse arguments parser = argparse.ArgumentParser(description="Keyboard control for Isaac Lab Pick and Place.") +parser.add_argument("--num_envs", type=int, default=32, help="Number of environments to spawn.") # append AppLauncher cli args AppLauncher.add_app_launcher_args(parser) # parse the arguments @@ -59,11 +60,22 @@ class PickAndPlaceEnvCfg(DirectRLEnvCfg): action_space = 4 observation_space = 6 state_space = 0 - device = "cpu" - # Simulation cfg. Note that we are forcing the simulation to run on CPU. - # This is because the surface gripper API is only supported on CPU backend for now. - sim: SimulationCfg = SimulationCfg(dt=1 / 60, render_interval=decimation, device="cpu") + # Simulation cfg. We run physics on GPU but enable CPU readback so that + # data is automatically available on CPU for the task/policy. + # sim_device is where physics runs (cuda for performance) + # task_device is where data buffers are allocated (cpu for convenience) + sim: SimulationCfg = SimulationCfg( + dt=1 / 60, + device=args_cli.device, # Physics simulation runs on input device (GPU by default) + render_interval=decimation, + use_fabric=True, + enable_scene_query_support=True, + enable_cpu_readback=True, # Data automatically copied to CPU + ) + # Task device - where tensor operations and data buffers live + # This should match where the simulation data is returned (CPU when enable_cpu_readback=True) + device: str = "cpu" debug_vis = True # robot @@ -136,8 +148,8 @@ def __init__(self, cfg: PickAndPlaceEnvCfg, render_mode: str | None = None, **kw self.joint_vel = self.pick_and_place.data.joint_vel # Buffers - self.go_to_cube = False - self.go_to_target = False + self.go_to_cube = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self.go_to_target = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) self.target_pos = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32) self.instant_controls = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32) self.permanent_controls = torch.zeros((self.num_envs, 1), device=self.device, dtype=torch.float32) @@ -173,35 +185,36 @@ def set_up_keyboard(self): print("Keyboard set up!") print("The simulation is ready for you to try it out!") print("Your goal is pick up the purple cube and to drop it on the red sphere!") - print("Use the following controls to interact with the simulation:") - print("Press the 'A' key to have the gripper track the cube position.") - print("Press the 'D' key to have the gripper track the target position") - print("Press the 'W' or 'S' keys to move the gantry UP or DOWN respectively") - print("Press 'Q' or 'E' to OPEN or CLOSE the gripper respectively") + print(f"Number of environments: {self.num_envs}") + print("Use the following controls to interact with ALL environments simultaneously:") + print("Press the 'A' key to have all grippers track the cube position.") + print("Press the 'D' key to have all grippers track the target position") + print("Press the 'W' or 'S' keys to move all gantries UP or DOWN respectively") + print("Press 'Q' or 'E' to OPEN or CLOSE all grippers respectively") def _on_keyboard_event(self, event): """Checks for a keyboard event and assign the corresponding command control depending on key pressed.""" if event.type == carb.input.KeyboardEventType.KEY_PRESS: - # Logic on key press + # Logic on key press - apply to ALL environments if event.input.name == self._auto_aim_target: - self.go_to_target = True - self.go_to_cube = False + self.go_to_target[:] = True + self.go_to_cube[:] = False if event.input.name == self._auto_aim_cube: - self.go_to_cube = True - self.go_to_target = False + self.go_to_cube[:] = True + self.go_to_target[:] = False if event.input.name in self._instant_key_controls: - self.go_to_cube = False - self.go_to_target = False - self.instant_controls[0] = self._instant_key_controls[event.input.name] + self.go_to_cube[:] = False + self.go_to_target[:] = False + self.instant_controls[:] = self._instant_key_controls[event.input.name] if event.input.name in self._permanent_key_controls: - self.go_to_cube = False - self.go_to_target = False - self.permanent_controls[0] = self._permanent_key_controls[event.input.name] - # On key release, the robot stops moving + self.go_to_cube[:] = False + self.go_to_target[:] = False + self.permanent_controls[:] = self._permanent_key_controls[event.input.name] + # On key release, all robots stop moving elif event.type == carb.input.KeyboardEventType.KEY_RELEASE: - self.go_to_cube = False - self.go_to_target = False - self.instant_controls[0] = self._instant_key_controls["ZEROS"] + self.go_to_cube[:] = False + self.go_to_target[:] = False + self.instant_controls[:] = self._instant_key_controls["ZEROS"] def _setup_scene(self): self.pick_and_place = Articulation(self.cfg.robot_cfg) @@ -225,28 +238,31 @@ def _pre_physics_step(self, actions: torch.Tensor) -> None: def _apply_action(self) -> None: # We use the keyboard outputs as an action. - if self.go_to_cube: - # Effort based proportional controller to track the cube position - head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]] - head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]] - cube_pos_x = self.cube.data.root_pos_w[:, 0] - self.scene.env_origins[:, 0] - cube_pos_y = self.cube.data.root_pos_w[:, 1] - self.scene.env_origins[:, 1] - d_cube_robot_x = cube_pos_x - head_pos_x - d_cube_robot_y = cube_pos_y - head_pos_y - self.instant_controls[0] = torch.tensor( - [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device - ) - elif self.go_to_target: - # Effort based proportional controller to track the target position - head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]] - head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]] - target_pos_x = self.target_pos[:, 0] - target_pos_y = self.target_pos[:, 1] - d_target_robot_x = target_pos_x - head_pos_x - d_target_robot_y = target_pos_y - head_pos_y - self.instant_controls[0] = torch.tensor( - [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device - ) + # Process each environment independently + for env_idx in range(self.num_envs): + if self.go_to_cube[env_idx]: + # Effort based proportional controller to track the cube position + head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]] + head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]] + cube_pos_x = self.cube.data.root_pos_w[env_idx, 0] - self.scene.env_origins[env_idx, 0] + cube_pos_y = self.cube.data.root_pos_w[env_idx, 1] - self.scene.env_origins[env_idx, 1] + d_cube_robot_x = cube_pos_x - head_pos_x + d_cube_robot_y = cube_pos_y - head_pos_y + self.instant_controls[env_idx] = torch.tensor( + [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device + ) + elif self.go_to_target[env_idx]: + # Effort based proportional controller to track the target position + head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]] + head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]] + target_pos_x = self.target_pos[env_idx, 0] + target_pos_y = self.target_pos[env_idx, 1] + d_target_robot_x = target_pos_x - head_pos_x + d_target_robot_y = target_pos_y - head_pos_y + self.instant_controls[env_idx] = torch.tensor( + [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device + ) + # Set the joint effort targets for the picker self.pick_and_place.set_joint_effort_target( self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx @@ -258,7 +274,7 @@ def _apply_action(self) -> None: self.permanent_controls[:, 0].unsqueeze(dim=1), joint_ids=self._z_dof_idx ) # Set the gripper command - self.gripper.set_grippers_command(self.instant_controls[:, 2].unsqueeze(dim=1)) + self.gripper.set_grippers_command(self.instant_controls[:, 2]) def _get_observations(self) -> dict: # Get the observations @@ -397,8 +413,11 @@ def _debug_vis_callback(self, event): def main(): """Main function.""" + # create environment configuration + env_cfg = PickAndPlaceEnvCfg() + env_cfg.scene.num_envs = args_cli.num_envs # create environment - pick_and_place = PickAndPlaceEnv(PickAndPlaceEnvCfg()) + pick_and_place = PickAndPlaceEnv(env_cfg) obs, _ = pick_and_place.reset() while simulation_app.is_running(): # check for selected robots diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index d6faec37316..135980e92c7 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -95,10 +95,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # override configurations with non-hydra CLI arguments env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device - # update agent device to match simulation device - if args_cli.device is not None: - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device # randomly sample a seed if seed = -1 if args_cli.seed == -1: diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index 634e5975676..d6900a3789f 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -102,11 +102,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen "Please use GPU device (e.g., --device cuda) for distributed training." ) - # update agent device to match simulation device - if args_cli.device is not None: - agent_cfg["params"]["config"]["device"] = args_cli.device - agent_cfg["params"]["config"]["device_name"] = args_cli.device - # randomly sample a seed if seed = -1 if args_cli.seed == -1: args_cli.seed = random.randint(0, 10000) diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 8b66feb28aa..ad739f4559a 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -182,7 +182,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = gym.wrappers.RecordVideo(env, **video_kwargs) # wrap around environment for rsl-rl - env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) + env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions, rl_device=agent_cfg.device) # create runner from rsl-rl if agent_cfg.class_name == "OnPolicyRunner": diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml index 9d0173b3702..f33f3f354b6 100644 --- a/source/isaaclab/config/extension.toml +++ b/source/isaaclab/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.47.10" +version = "0.48.0" # Description title = "Isaac Lab framework for Robot Learning" diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst index b0f1719d722..4acbc1c9102 100644 --- a/source/isaaclab/docs/CHANGELOG.rst +++ b/source/isaaclab/docs/CHANGELOG.rst @@ -2,6 +2,55 @@ Changelog --------- +0.48.0 (2025-11-07) +~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added ``enable_cpu_readback`` parameter to :class:`~isaaclab.sim.SimulationCfg` to control whether physics data + is automatically copied from GPU to CPU. When enabled with GPU simulation, allows data to be returned on CPU + while physics runs on GPU. +* Added ``device`` parameter to :class:`~isaaclab.scene.InteractiveScene` to explicitly specify device for scene + tensor allocation, enabling proper device separation between simulation and task/environment. +* Added ``device`` configuration field to :class:`~isaaclab.envs.DirectRLEnvCfg`, + :class:`~isaaclab.envs.DirectMARLEnvCfg`, and :class:`~isaaclab.envs.ManagerBasedEnvCfg` to allow explicit + control of task device independent from simulation device. +* Added simulation device information to environment initialization print output for better visibility of the + three-layer device architecture (simulation device, environment device, training device). + +Changed +^^^^^^^ + +* Modified :class:`~isaaclab.assets.SurfaceGripper` to support GPU simulation with CPU readback. Now validates + that either simulation runs on CPU or ``enable_cpu_readback=True`` is set for GPU simulation. +* Updated all environment classes (:class:`~isaaclab.envs.DirectRLEnv`, :class:`~isaaclab.envs.DirectMARLEnv`, + :class:`~isaaclab.envs.ManagerBasedEnv`) to pass task device to :class:`~isaaclab.scene.InteractiveScene` + for proper device initialization. +* Updated RL training scripts (RSL-RL, RL-Games, skrl, Stable-Baselines3) to decouple simulation device (``--device`` flag) + from RL training device. RL training device now uses configuration defaults unless in distributed mode. +* Enhanced RL library wrappers (:class:`~isaaclab_rl.rsl_rl.RslRlVecEnvWrapper`, + :class:`~isaaclab_rl.rl_games.RlGamesVecEnvWrapper`) to properly handle device transfers between environment + device and RL training device. + +Fixed +^^^^^ + +* Fixed device mismatch issues when using ``enable_cpu_readback=True`` by ensuring ``scene.env_origins`` and + other scene tensors are allocated on the correct task device. +* Fixed RL-Games wrapper to properly transfer observations from environment device to RL device in addition + to existing action transfers. +* Fixed environment buffers (``reset_buf``, ``episode_length_buf``) in :class:`~isaaclab.envs.DirectRLEnv`, + :class:`~isaaclab.envs.DirectMARLEnv`, and :class:`~isaaclab.envs.ManagerBasedRLEnv` to be allocated on + environment device instead of simulation device. +* Fixed environment device property in all environment classes to automatically default to CPU when + ``enable_cpu_readback=True`` is set, ensuring ``env_ids`` and other environment buffers are created on + the correct device without requiring explicit ``device`` configuration. +* Fixed ``episode_length_buf`` initialization in :class:`~isaaclab.envs.ManagerBasedRLEnv` to respect + ``enable_cpu_readback`` setting, preventing device mismatch errors in termination manager when using + CPU readback with GPU simulation. + + 0.47.10 (2025-11-06) ~~~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py b/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py index 50a17d85efe..3f01b41e74d 100644 --- a/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py +++ b/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py @@ -42,8 +42,10 @@ class SurfaceGripper(AssetBase): function is called automatically for every simulation step, and does not need to be called by the user. Note: - The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU. - Use `--device cpu` to run the simulation on CPU. + The SurfaceGripper requires data on CPU. You can either: + + 1. Run simulation on CPU: ``sim.device='cpu'`` + 2. Run simulation on GPU with CPU readback: ``sim.device='cuda:0'`` and ``sim.enable_cpu_readback=True`` """ def __init__(self, cfg: SurfaceGripperCfg): @@ -243,22 +245,32 @@ def _initialize_impl(self) -> None: """Initializes the gripper-related handles and internal buffers. Raises: - ValueError: If the simulation backend is not CPU. + ValueError: If GPU simulation is used without CPU readback enabled. RuntimeError: If the Simulation Context is not initialized or if gripper prims are not found. Note: - The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU. - Use `--device cpu` to run the simulation on CPU. + The SurfaceGripper requires data on CPU. When using GPU physics (``sim.device='cuda:0'``), + you must enable CPU readback (``sim.enable_cpu_readback=True``) so that data is automatically + copied to CPU. """ enable_extension("isaacsim.robot.surface_gripper") from isaacsim.robot.surface_gripper import GripperView - # Check that we are using the CPU backend. - if self._device != "cpu": - raise Exception( - "SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU. Use" - " `--device cpu` to run the simulation on CPU." + # Check that if GPU simulation is used, CPU readback must be enabled + # SurfaceGripper needs data on CPU, so either: + # 1. Simulation on CPU (self._device == "cpu"), or + # 2. Simulation on GPU with enable_cpu_readback=True (data returned on CPU) + sim_device = sim_utils.SimulationContext.instance().cfg.device + enable_cpu_readback = sim_utils.SimulationContext.instance().cfg.enable_cpu_readback + + if "cuda" in sim_device.lower() and not enable_cpu_readback: + raise ValueError( + f"SurfaceGripper requires data on CPU. Current configuration has simulation device '{sim_device}' " + f"with enable_cpu_readback={enable_cpu_readback}. " + "Please either:\n" + " 1. Set sim.device='cpu', or\n" + " 2. Set sim.enable_cpu_readback=True to run GPU physics with CPU data readback." ) # obtain the first prim in the regex expression (all others are assumed to be a copy of this) diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py index 0e7429117fc..d0424f9769f 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py @@ -103,6 +103,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar # print useful information print("[INFO]: Base environment:") + print(f"\tSimulation device : {self.sim.device}") print(f"\tEnvironment device : {self.device}") print(f"\tEnvironment seed : {self.cfg.seed}") print(f"\tPhysics step-size : {self.physics_dt}") @@ -121,7 +122,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar with Timer("[INFO]: Time taken for scene creation", "scene_creation"): # set the stage context for scene creation steps which use the stage with use_stage(self.sim.get_initial_stage()): - self.scene = InteractiveScene(self.cfg.scene) + self.scene = InteractiveScene(self.cfg.scene, device=self.device) self._setup_scene() attach_stage_to_usd_context() print("[INFO]: Scene manager: ", self.scene) @@ -184,7 +185,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar self.common_step_counter = 0 # -- init buffers self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) - self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device) + self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) # setup the observation, state and action spaces self._configure_env_spaces() @@ -266,7 +267,19 @@ def step_dt(self) -> float: @property def device(self): - """The device on which the environment is running.""" + """The device on which the task computations are performed. + + This can be different from :attr:`sim.device` when using CPU readback. + For example, physics can run on GPU while task buffers are on CPU. + """ + # If device is explicitly set in config, use that + if hasattr(self.cfg, "device") and self.cfg.device is not None: + return self.cfg.device + # If CPU readback is enabled, default to CPU for environment device + # since simulation data will be automatically copied to CPU + if self.cfg.sim.enable_cpu_readback: + return "cpu" + # Otherwise fall back to simulation device return self.sim.device @property diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py index 15f57cb4c03..abe69e06f97 100644 --- a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py @@ -29,6 +29,21 @@ class DirectMARLEnvCfg: sim: SimulationCfg = SimulationCfg() """Physics simulation configuration. Default is SimulationCfg().""" + device: str | None = None + """Device for task computations (e.g., 'cuda:0', 'cpu'). Default is None. + + If None, the device is inferred from the simulation device (:attr:`sim.device`). + + This parameter allows separating the physics simulation device from the device where + task buffers and computations occur. For example, you can run physics on GPU + (:attr:`sim.device` = 'cuda:0') while keeping task data on CPU (:attr:`device` = 'cpu') + by enabling CPU readback (:attr:`sim.enable_cpu_readback` = True). + + Note: + When using :attr:`sim.enable_cpu_readback` = True with GPU physics, this should + be set to 'cpu' since simulation data will be returned on CPU. + """ + # ui settings ui_window_class_type: type | None = BaseEnvWindow """The class type of the UI window. Default is None. diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index a4452b707d7..1c24cf39694 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -110,6 +110,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs # print useful information print("[INFO]: Base environment:") + print(f"\tSimulation device : {self.sim.device}") print(f"\tEnvironment device : {self.device}") print(f"\tEnvironment seed : {self.cfg.seed}") print(f"\tPhysics step-size : {self.physics_dt}") @@ -128,7 +129,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs with Timer("[INFO]: Time taken for scene creation", "scene_creation"): # set the stage context for scene creation steps which use the stage with use_stage(self.sim.get_initial_stage()): - self.scene = InteractiveScene(self.cfg.scene) + self.scene = InteractiveScene(self.cfg.scene, device=self.device) self._setup_scene() attach_stage_to_usd_context() print("[INFO]: Scene manager: ", self.scene) @@ -193,7 +194,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) self.reset_terminated = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) self.reset_time_outs = torch.zeros_like(self.reset_terminated) - self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device) + self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) # setup the action and observation spaces for Gym self._configure_gym_env_spaces() @@ -268,7 +269,19 @@ def step_dt(self) -> float: @property def device(self): - """The device on which the environment is running.""" + """The device on which the task computations are performed. + + This can be different from :attr:`sim.device` when using CPU readback. + For example, physics can run on GPU while task buffers are on CPU. + """ + # If device is explicitly set in config, use that + if hasattr(self.cfg, "device") and self.cfg.device is not None: + return self.cfg.device + # If CPU readback is enabled, default to CPU for environment device + # since simulation data will be automatically copied to CPU + if self.cfg.sim.enable_cpu_readback: + return "cpu" + # Otherwise fall back to simulation device return self.sim.device @property diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py index b378beaa86f..1bae9a2176c 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py @@ -29,6 +29,21 @@ class DirectRLEnvCfg: sim: SimulationCfg = SimulationCfg() """Physics simulation configuration. Default is SimulationCfg().""" + device: str | None = None + """Device for task computations (e.g., 'cuda:0', 'cpu'). Default is None. + + If None, the device is inferred from the simulation device (:attr:`sim.device`). + + This parameter allows separating the physics simulation device from the device where + task buffers and computations occur. For example, you can run physics on GPU + (:attr:`sim.device` = 'cuda:0') while keeping task data on CPU (:attr:`device` = 'cpu') + by enabling CPU readback (:attr:`sim.enable_cpu_readback` = True). + + Note: + When using :attr:`sim.enable_cpu_readback` = True with GPU physics, this should + be set to 'cpu' since simulation data will be returned on CPU. + """ + # ui settings ui_window_class_type: type | None = BaseEnvWindow """The class type of the UI window. Default is None. diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index 455af1e2c6b..3132ca949d3 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -110,6 +110,7 @@ def __init__(self, cfg: ManagerBasedEnvCfg): # print useful information print("[INFO]: Base environment:") + print(f"\tSimulation device : {self.sim.device}") print(f"\tEnvironment device : {self.device}") print(f"\tEnvironment seed : {self.cfg.seed}") print(f"\tPhysics step-size : {self.physics_dt}") @@ -134,7 +135,7 @@ def __init__(self, cfg: ManagerBasedEnvCfg): with Timer("[INFO]: Time taken for scene creation", "scene_creation"): # set the stage context for scene creation steps which use the stage with use_stage(self.sim.get_initial_stage()): - self.scene = InteractiveScene(self.cfg.scene) + self.scene = InteractiveScene(self.cfg.scene, device=self.device) attach_stage_to_usd_context() print("[INFO]: Scene manager: ", self.scene) @@ -236,7 +237,19 @@ def step_dt(self) -> float: @property def device(self): - """The device on which the environment is running.""" + """The device on which the task computations are performed. + + This can be different from :attr:`sim.device` when using CPU readback. + For example, physics can run on GPU while task buffers are on CPU. + """ + # If device is explicitly set in config, use that + if hasattr(self.cfg, "device") and self.cfg.device is not None: + return self.cfg.device + # If CPU readback is enabled, default to CPU for environment device + # since simulation data will be automatically copied to CPU + if self.cfg.sim.enable_cpu_readback: + return "cpu" + # Otherwise fall back to simulation device return self.sim.device @property diff --git a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py index 03353baf34d..22ead032bc7 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py @@ -46,6 +46,21 @@ class ManagerBasedEnvCfg: sim: SimulationCfg = SimulationCfg() """Physics simulation configuration. Default is SimulationCfg().""" + device: str | None = None + """Device for task computations (e.g., 'cuda:0', 'cpu'). Default is None. + + If None, the device is inferred from the simulation device (:attr:`sim.device`). + + This parameter allows separating the physics simulation device from the device where + task buffers and computations occur. For example, you can run physics on GPU + (:attr:`sim.device` = 'cuda:0') while keeping task data on CPU (:attr:`device` = 'cpu') + by enabling CPU readback (:attr:`sim.enable_cpu_readback` = True). + + Note: + When using :attr:`sim.enable_cpu_readback` = True with GPU physics, this should + be set to 'cpu' since simulation data will be returned on CPU. + """ + # ui settings ui_window_class_type: type | None = BaseEnvWindow """The class type of the UI window. Default is None. diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index 861072dec0a..81ddbdc501d 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -76,7 +76,16 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, ** self.common_step_counter = 0 # initialize the episode length buffer BEFORE loading the managers to use it in mdp functions. - self.episode_length_buf = torch.zeros(cfg.scene.num_envs, device=cfg.sim.device, dtype=torch.long) + # Note: This needs to be on the environment device, not the simulation device + # We compute device from cfg directly since self.cfg is not set yet + if hasattr(cfg, "device") and cfg.device is not None: + device = cfg.device + elif cfg.sim.enable_cpu_readback: + # If CPU readback is enabled, default to CPU for environment device + device = "cpu" + else: + device = cfg.sim.device + self.episode_length_buf = torch.zeros(cfg.scene.num_envs, device=device, dtype=torch.long) # initialize the base class to setup the scene. super().__init__(cfg=cfg) diff --git a/source/isaaclab/isaaclab/scene/interactive_scene.py b/source/isaaclab/isaaclab/scene/interactive_scene.py index 15739c33ad7..723bf11f80e 100644 --- a/source/isaaclab/isaaclab/scene/interactive_scene.py +++ b/source/isaaclab/isaaclab/scene/interactive_scene.py @@ -107,16 +107,18 @@ class MySceneCfg(InteractiveSceneCfg): for more details. """ - def __init__(self, cfg: InteractiveSceneCfg): + def __init__(self, cfg: InteractiveSceneCfg, device: str | None = None): """Initializes the scene. Args: cfg: The configuration class for the scene. + device: The device on which scene tensors should be allocated. If None, defaults to simulation device. """ # check that the config is valid cfg.validate() # store inputs self.cfg = cfg + self._device = device # initialize scene elements self._terrain = None self._articulations = dict() @@ -338,6 +340,8 @@ def physics_dt(self) -> float: @property def device(self) -> str: """The device on which the scene is created.""" + if self._device is not None: + return self._device return sim_utils.SimulationContext.instance().device # pyright: ignore [reportOptionalMemberAccess] @property diff --git a/source/isaaclab/isaaclab/sim/simulation_cfg.py b/source/isaaclab/isaaclab/sim/simulation_cfg.py index 380dba26c51..ca10ca26344 100644 --- a/source/isaaclab/isaaclab/sim/simulation_cfg.py +++ b/source/isaaclab/isaaclab/sim/simulation_cfg.py @@ -390,6 +390,26 @@ class SimulationCfg: running under the hood. """ + enable_cpu_readback: bool = False + """Enable/disable automatic readback of physics data to CPU. Default is False. + + When set to :obj:`True`, physics simulation data (positions, velocities, etc.) is + copied to the CPU, making it readily available on the host. This may be necessary for certain + operations that require CPU access to physics data, at the cost of reduced performance. + + When set to :obj:`False` (default), physics data is kept on the GPU and not automatically + copied to the CPU. This provides optimal performance when running GPU-accelerated physics by avoiding + unnecessary memory transfers. + + Note: + This setting is only applicable when :attr:`device` is a CUDA device. When the simulation + device is CPU, this flag is ignored as all data is already on the CPU. + + Setting this to :obj:`True` with a CUDA device allows running physics simulation on the GPU + while still having CPU access to the data, which can be useful when + interfacing with CPU-only code. + """ + physx: PhysxCfg = PhysxCfg() """PhysX solver settings. Default is PhysxCfg().""" diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py index 83277635acf..9082a24e376 100644 --- a/source/isaaclab/isaaclab/sim/simulation_context.py +++ b/source/isaaclab/isaaclab/sim/simulation_context.py @@ -297,6 +297,10 @@ def __init__(self, cfg: SimulationCfg | None = None): stage=self._initial_stage, ) + # apply cpu readback setting after creating the simulation context + # this overrides the default behavior set by omni_isaac_sim's PhysicsContext + self._apply_cpu_readback_setting() + """ Properties - Override. """ @@ -649,6 +653,30 @@ def clear_instance(cls): Helper Functions """ + def _apply_cpu_readback_setting(self): + """Applies the CPU readback setting from the configuration. + + This method overrides the default suppress readback behavior set by Isaac Sim + based on the user-specified configuration. This allows users to control whether + physics data is automatically copied from device (GPU) to host (CPU). + + When enable_cpu_readback is False (default), data is kept on GPU when simulation is set to GPU. + When explicitly set to True, data will be returned on CPU even when simulation is set to GPU. + + Note: + This setting is only applicable when the simulation device is CUDA. For CPU + simulations, this setting is ignored as data is already on CPU. + """ + # Only apply if user has enabled cpu readback AND device is CUDA + if self.cfg.enable_cpu_readback and "cuda" in self.cfg.device.lower(): + # User wants CPU readback enabled, so we override the default behavior + # Note: enable_cpu_readback=True means suppressReadback=False + set_carb_setting(self.carb_settings, "/physics/suppressReadback", False) + omni.log.info( + "Physics CPU readback enabled: Data will be automatically copied to host (CPU). " + "This may reduce performance but makes data readily available on CPU." + ) + def _apply_physics_settings(self): """Sets various carb physics settings.""" # enable hydra scene-graph instancing diff --git a/source/isaaclab/test/assets/test_surface_gripper.py b/source/isaaclab/test/assets/test_surface_gripper.py index c2f81143f59..e3dc27085b2 100644 --- a/source/isaaclab/test/assets/test_surface_gripper.py +++ b/source/isaaclab/test/assets/test_surface_gripper.py @@ -206,8 +206,8 @@ def test_initialization(sim, num_articulations, device, add_ground_plane) -> Non @pytest.mark.parametrize("device", ["cuda:0"]) @pytest.mark.parametrize("add_ground_plane", [True]) @pytest.mark.isaacsim_ci -def test_raise_error_if_not_cpu(sim, device, add_ground_plane) -> None: - """Test that the SurfaceGripper raises an error if the device is not CPU.""" +def test_raise_error_if_gpu_without_cpu_readback(sim, device, add_ground_plane) -> None: + """Test that the SurfaceGripper raises an error if GPU is used without CPU readback enabled.""" isaac_sim_version = get_version() if int(isaac_sim_version[2]) < 5: return @@ -217,9 +217,61 @@ def test_raise_error_if_not_cpu(sim, device, add_ground_plane) -> None: surface_gripper_cfg, articulation_cfg, num_articulations, device ) - with pytest.raises(Exception): + # Should raise ValueError since GPU sim without enable_cpu_readback=True + with pytest.raises(ValueError, match="SurfaceGripper requires data on CPU"): sim.reset() +@pytest.mark.parametrize("num_articulations", [1]) +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("add_ground_plane", [True]) +@pytest.mark.isaacsim_ci +def test_gpu_with_cpu_readback(sim, device, add_ground_plane) -> None: + """Test that SurfaceGripper works with GPU simulation when CPU readback is enabled. + + This test verifies that: + 1. GPU simulation with enable_cpu_readback=True works correctly. + 2. The surface gripper can be initialized on GPU with CPU data. + 3. The command and state buffers work correctly in this configuration. + + Args: + num_articulations: The number of articulations to initialize. + device: The device to run the test on. + add_ground_plane: Whether to add a ground plane to the simulation. + """ + isaac_sim_version = get_version() + if int(isaac_sim_version[2]) < 5: + return + + # Set enable_cpu_readback=True for GPU simulation + sim.cfg.enable_cpu_readback = True + + surface_gripper_cfg, articulation_cfg = generate_surface_gripper_cfgs(kinematic_enabled=False) + surface_gripper, articulation, _ = generate_surface_gripper( + surface_gripper_cfg, articulation_cfg, num_articulations, device + ) + + sim.reset() + + assert articulation.is_initialized + assert surface_gripper.is_initialized + + # Check that the command and state buffers have the correct shapes + assert surface_gripper.command.shape == (num_articulations,) + assert surface_gripper.state.shape == (num_articulations,) + + # Check that the command and state are initialized to the correct values + assert surface_gripper.command == 0.0 # Idle command after a reset + assert surface_gripper.state == -1.0 # Open state after a reset + + # Simulate physics + for _ in range(10): + # perform rendering + sim.step() + # update articulation + articulation.update(sim.cfg.dt) + surface_gripper.update(sim.cfg.dt) + + if __name__ == "__main__": pytest.main([__file__, "-v", "--maxfail=1"]) diff --git a/source/isaaclab/test/sim/test_simulation_context.py b/source/isaaclab/test/sim/test_simulation_context.py index f0f783463d2..a1cab081d4c 100644 --- a/source/isaaclab/test/sim/test_simulation_context.py +++ b/source/isaaclab/test/sim/test_simulation_context.py @@ -146,3 +146,115 @@ def test_zero_gravity(): gravity_dir, gravity_mag = sim.get_physics_context().get_gravity() gravity = np.array(gravity_dir) * gravity_mag np.testing.assert_almost_equal(gravity, cfg.gravity) + + +@pytest.mark.isaacsim_ci +def test_cpu_readback_default_cuda(): + """Test default behavior with CUDA device (enable_cpu_readback=False).""" + import carb + + # Create simulation context with default settings on CUDA + cfg = SimulationCfg(device="cuda:0") # enable_cpu_readback defaults to False + sim = SimulationContext(cfg) + + # Check the carb setting - default (False) should not override omni_isaac_sim's behavior + # omni_isaac_sim sets suppressReadback=True for CUDA by default + carb_settings = carb.settings.get_settings() + suppress_readback = carb_settings.get_as_bool("/physics/suppressReadback") + + # With default settings (enable_cpu_readback=False), we don't override, so omni_isaac_sim's + # default behavior applies (suppressReadback=True for CUDA) + assert suppress_readback is True, "Default CUDA behavior should have suppressReadback=True" + + +@pytest.mark.isaacsim_ci +def test_cpu_readback_enabled(): + """Test enabling CPU readback (enable_cpu_readback=True).""" + import carb + + # Create simulation context with CPU readback enabled + cfg = SimulationCfg(device="cuda:0", enable_cpu_readback=True) + sim = SimulationContext(cfg) + + # Check the carb setting - should be suppressReadback=False + carb_settings = carb.settings.get_settings() + suppress_readback = carb_settings.get_as_bool("/physics/suppressReadback") + + assert suppress_readback is False, "enable_cpu_readback=True should set suppressReadback=False" + + +@pytest.mark.isaacsim_ci +def test_cpu_readback_disabled(): + """Test with CPU readback disabled (enable_cpu_readback=False, explicit).""" + import carb + + # Create simulation context with CPU readback explicitly disabled + cfg = SimulationCfg(device="cuda:0", enable_cpu_readback=False) + sim = SimulationContext(cfg) + + # Check the carb setting - should use omni_isaac_sim's default (suppressReadback=True) + carb_settings = carb.settings.get_settings() + suppress_readback = carb_settings.get_as_bool("/physics/suppressReadback") + + # enable_cpu_readback=False means we don't override, so default applies + assert suppress_readback is True, "enable_cpu_readback=False should use default suppressReadback=True" + + +@pytest.mark.isaacsim_ci +def test_cpu_readback_override(): + """Test that enable_cpu_readback properly overrides omni_isaac_sim's default behavior.""" + import carb + import isaacsim.core.utils.stage as stage_utils + + # First create with default settings + cfg_default = SimulationCfg(device="cuda:0") + sim_default = SimulationContext(cfg_default) + + carb_settings = carb.settings.get_settings() + default_value = carb_settings.get_as_bool("/physics/suppressReadback") + + # Clean up + sim_default.clear_all_callbacks() + sim_default.clear_instance() + + # Create stage again + stage_utils.create_new_stage() + + # Now create with explicit enable_cpu_readback=True (opposite of default) + cfg_override = SimulationCfg(device="cuda:0", enable_cpu_readback=True) + sim_override = SimulationContext(cfg_override) + + override_value = carb_settings.get_as_bool("/physics/suppressReadback") + + # The override should be different from default (if default was True, override should be False) + # enable_cpu_readback=True -> suppressReadback=False + assert override_value is False, "enable_cpu_readback=True should result in suppressReadback=False" + + # If default was True (GPU optimized), then override should be False (CPU readback enabled) + if default_value is True: + assert override_value is False, "Override successfully changed suppressReadback from True to False" + + # Clean up + sim_override.clear_all_callbacks() + sim_override.clear_instance() + + +@pytest.mark.isaacsim_ci +def test_cpu_readback_ignored_on_cpu_device(): + """Test that enable_cpu_readback is ignored when simulation device is CPU.""" + import carb + + # Create simulation context with CPU device and enable_cpu_readback=True + # This should trigger a warning but not apply any settings + cfg = SimulationCfg(device="cpu", enable_cpu_readback=True) + sim = SimulationContext(cfg) + + # The flag should be ignored for CPU devices + # We can't really check the carb setting as CPU device doesn't use suppressReadback + # but we verify that the simulation still initializes successfully + assert sim.device == "cpu", "Simulation device should be CPU" + + # Clean up + sim.clear_all_callbacks() + sim.clear_instance() + diff --git a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py index 8c448c172ac..d60e53bf339 100644 --- a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py +++ b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py @@ -319,6 +319,10 @@ def _process_obs(self, obs_dict: VecEnvObs) -> dict[str, torch.Tensor] | dict[st - ``"obs"``: either a concatenated tensor (``concate_obs_group=True``) or a Dict of group tensors. - ``"states"`` (optional): same structure as above when state groups are configured; omitted otherwise. """ + # move observations to RL device if different from sim device + if self._rl_device != self._sim_device: + obs_dict = {key: obs.to(device=self._rl_device) for key, obs in obs_dict.items()} + # clip the observations for key, obs in obs_dict.items(): obs_dict[key] = torch.clamp(obs, -self._clip_obs, self._clip_obs) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 5b03a7c639b..0d833b6d59e 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -139,7 +139,15 @@ class RslRlBaseRunnerCfg: """The seed for the experiment. Default is 42.""" device: str = "cuda:0" - """The device for the rl-agent. Default is cuda:0.""" + """The device for the rl-agent. Default is cuda:0. + + This is where the RL policy and training computations occur. This can be different + from the environment device (where task buffers are) and the simulation device + (where physics runs). For example: + - sim.device = "cuda:0" (GPU physics) + - env.device = "cpu" (task buffers with CPU readback) + - rl.device = "cuda:0" (RL training on GPU) + """ num_steps_per_env: int = MISSING """The number of steps per environment per update.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 73ceae04693..74c4bc93ad8 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -24,7 +24,7 @@ class RslRlVecEnvWrapper(VecEnv): https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py """ - def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None): + def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None, rl_device: str | None = None): """Initializes the wrapper. Note: @@ -33,6 +33,9 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N Args: env: The environment to wrap around. clip_actions: The clipping value for actions. If ``None``, then no clipping is done. + rl_device: The device for RL agent/policy. If ``None``, uses the environment device. + This allows running the RL agent on a different device than the environment. + For example, you can run physics on GPU, have task buffers on CPU, and run RL on GPU. Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. @@ -48,11 +51,22 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N # initialize the wrapper self.env = env self.clip_actions = clip_actions + + # store the RL device (where policy/training happens) + # this may be different from env.device (where task buffers are) + if rl_device is None: + self.rl_device = self.unwrapped.device + else: + self.rl_device = rl_device # store information required by wrapper self.num_envs = self.unwrapped.num_envs - self.device = self.unwrapped.device + # RSL-RL accesses self.device to know where the policy should be + self.device = self.rl_device self.max_episode_length = self.unwrapped.max_episode_length + + # track the environment device separately + self.env_device = self.unwrapped.device # obtain dimensions of the environment if hasattr(self.unwrapped, "action_manager"): @@ -139,6 +153,9 @@ def seed(self, seed: int = -1) -> int: # noqa: D102 def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 # reset the environment obs_dict, extras = self.env.reset() + # move observations to RL device if different from env device + if self.rl_device != self.env_device: + obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} return TensorDict(obs_dict, batch_size=[self.num_envs]), extras def get_observations(self) -> TensorDict: @@ -147,14 +164,26 @@ def get_observations(self) -> TensorDict: obs_dict = self.unwrapped.observation_manager.compute() else: obs_dict = self.unwrapped._get_observations() + # move observations to RL device if different from env device + if self.rl_device != self.env_device: + obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} return TensorDict(obs_dict, batch_size=[self.num_envs]) def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: + # move actions to env device if coming from different RL device + if self.rl_device != self.env_device: + actions = actions.to(self.env_device) # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) # record step information obs_dict, rew, terminated, truncated, extras = self.env.step(actions) + # move outputs to RL device if different from env device + if self.rl_device != self.env_device: + obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()} + rew = rew.to(self.rl_device) + terminated = terminated.to(self.rl_device) + truncated = truncated.to(self.rl_device) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) # move time out information to the extras dict diff --git a/source/isaaclab_tasks/test/env_test_utils.py b/source/isaaclab_tasks/test/env_test_utils.py index 1034fd9ac92..36bc0495f30 100644 --- a/source/isaaclab_tasks/test/env_test_utils.py +++ b/source/isaaclab_tasks/test/env_test_utils.py @@ -96,6 +96,7 @@ def _run_environments( multi_agent=False, create_stage_in_memory=False, disable_clone_in_fabric=False, + enable_cpu_readback=False, ): """Run all environments and check environments return valid signals. @@ -107,6 +108,7 @@ def _run_environments( multi_agent: Whether the environment is multi-agent. create_stage_in_memory: Whether to create stage in memory. disable_clone_in_fabric: Whether to disable fabric cloning. + enable_cpu_readback: Whether to enable CPU readback for GPU simulations. """ # skip test if stage in memory is not supported @@ -114,8 +116,9 @@ def _run_environments( if isaac_sim_version < 5 and create_stage_in_memory: pytest.skip("Stage in memory is not supported in this version of Isaac Sim") - # skip suction gripper environments as they require CPU simulation and cannot be run with GPU simulation - if "Suction" in task_name and device != "cpu": + # skip suction gripper environments if CPU readback is disabled and device is not CPU + # (they were updated to support GPU with CPU readback) + if "Suction" in task_name and device != "cpu" and not enable_cpu_readback: return # skip these environments as they cannot be run with 32 environments within reasonable VRAM @@ -158,6 +161,7 @@ def _run_environments( multi_agent=multi_agent, create_stage_in_memory=create_stage_in_memory, disable_clone_in_fabric=disable_clone_in_fabric, + enable_cpu_readback=enable_cpu_readback, ) print(f""">>> Closing environment: {task_name}""") print("-" * 80) @@ -171,6 +175,7 @@ def _check_random_actions( multi_agent: bool = False, create_stage_in_memory: bool = False, disable_clone_in_fabric: bool = False, + enable_cpu_readback: bool = False, ): """Run random actions and check environments return valid signals. @@ -182,6 +187,7 @@ def _check_random_actions( multi_agent: Whether the environment is multi-agent. create_stage_in_memory: Whether to create stage in memory. disable_clone_in_fabric: Whether to disable fabric cloning. + enable_cpu_readback: Whether to enable CPU readback for GPU simulations. """ # create a new context stage, if stage in memory is not enabled if not create_stage_in_memory: @@ -196,6 +202,9 @@ def _check_random_actions( env_cfg.sim.create_stage_in_memory = create_stage_in_memory if disable_clone_in_fabric: env_cfg.scene.clone_in_fabric = False + # enable CPU readback if requested + if enable_cpu_readback: + env_cfg.sim.enable_cpu_readback = True # filter based off multi agents mode and create env if multi_agent: diff --git a/source/isaaclab_tasks/test/test_environments.py b/source/isaaclab_tasks/test/test_environments.py index 2a0c9d4ea52..6f275ade385 100644 --- a/source/isaaclab_tasks/test/test_environments.py +++ b/source/isaaclab_tasks/test/test_environments.py @@ -33,3 +33,16 @@ def test_environments(task_name, num_envs, device): # run environments without stage in memory _run_environments(task_name, device, num_envs, create_stage_in_memory=False) + + +@pytest.mark.parametrize("num_envs, device", [(32, "cuda"), (1, "cuda")]) +@pytest.mark.parametrize("task_name", setup_environment(include_play=False, factory_envs=False, multi_agent=False)) +@pytest.mark.isaacsim_ci +def test_environments_with_cpu_readback(task_name, num_envs, device): + """Test environments with CPU readback enabled. + + This test forces enable_cpu_readback=True for all environments to ensure that + the device separation between simulation (GPU) and environment (CPU) works correctly. + """ + # run environments with CPU readback enabled + _run_environments(task_name, device, num_envs, create_stage_in_memory=False, enable_cpu_readback=True)