diff --git a/scripts/demos/pick_and_place.py b/scripts/demos/pick_and_place.py
index cc14dcb0a72..f4b3bc8ef87 100644
--- a/scripts/demos/pick_and_place.py
+++ b/scripts/demos/pick_and_place.py
@@ -11,6 +11,7 @@
 
 # add argparse arguments
 parser = argparse.ArgumentParser(description="Keyboard control for Isaac Lab Pick and Place.")
+parser.add_argument("--num_envs", type=int, default=32, help="Number of environments to spawn.")
 # append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
 # parse the arguments
@@ -59,11 +60,22 @@ class PickAndPlaceEnvCfg(DirectRLEnvCfg):
     action_space = 4
     observation_space = 6
     state_space = 0
-    device = "cpu"
 
-    # Simulation cfg. Note that we are forcing the simulation to run on CPU.
-    # This is because the surface gripper API is only supported on CPU backend for now.
-    sim: SimulationCfg = SimulationCfg(dt=1 / 60, render_interval=decimation, device="cpu")
+    # Simulation cfg. We run physics on GPU but enable CPU readback so that
+    # data is automatically available on CPU for the task/policy.
+    # sim_device is where physics runs (cuda for performance)
+    # task_device is where data buffers are allocated (cpu for convenience)
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 60,
+        device=args_cli.device,  # Physics simulation runs on input device (GPU by default)
+        render_interval=decimation,
+        use_fabric=True,
+        enable_scene_query_support=True,
+        enable_cpu_readback=True,  # Data automatically copied to CPU
+    )
+    # Task device - where tensor operations and data buffers live
+    # This should match where the simulation data is returned (CPU when enable_cpu_readback=True)
+    device: str = "cpu"
     debug_vis = True
 
     # robot
@@ -136,8 +148,8 @@ def __init__(self, cfg: PickAndPlaceEnvCfg, render_mode: str | None = None, **kw
         self.joint_vel = self.pick_and_place.data.joint_vel
 
         # Buffers
-        self.go_to_cube = False
-        self.go_to_target = False
+        self.go_to_cube = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
+        self.go_to_target = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
         self.target_pos = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32)
         self.instant_controls = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32)
         self.permanent_controls = torch.zeros((self.num_envs, 1), device=self.device, dtype=torch.float32)
@@ -173,35 +185,36 @@ def set_up_keyboard(self):
         print("Keyboard set up!")
         print("The simulation is ready for you to try it out!")
         print("Your goal is pick up the purple cube and to drop it on the red sphere!")
-        print("Use the following controls to interact with the simulation:")
-        print("Press the 'A' key to have the gripper track the cube position.")
-        print("Press the 'D' key to have the gripper track the target position")
-        print("Press the 'W' or 'S' keys to move the gantry UP or DOWN respectively")
-        print("Press 'Q' or 'E' to OPEN or CLOSE the gripper respectively")
+        print(f"Number of environments: {self.num_envs}")
+        print("Use the following controls to interact with ALL environments simultaneously:")
+        print("Press the 'A' key to have all grippers track the cube position.")
+        print("Press the 'D' key to have all grippers track the target position")
+        print("Press the 'W' or 'S' keys to move all gantries UP or DOWN respectively")
+        print("Press 'Q' or 'E' to OPEN or CLOSE all grippers respectively")
 
     def _on_keyboard_event(self, event):
         """Checks for a keyboard event and assign the corresponding command control depending on key pressed."""
         if event.type == carb.input.KeyboardEventType.KEY_PRESS:
-            # Logic on key press
+            # Logic on key press - apply to ALL environments
             if event.input.name == self._auto_aim_target:
-                self.go_to_target = True
-                self.go_to_cube = False
+                self.go_to_target[:] = True
+                self.go_to_cube[:] = False
             if event.input.name == self._auto_aim_cube:
-                self.go_to_cube = True
-                self.go_to_target = False
+                self.go_to_cube[:] = True
+                self.go_to_target[:] = False
             if event.input.name in self._instant_key_controls:
-                self.go_to_cube = False
-                self.go_to_target = False
-                self.instant_controls[0] = self._instant_key_controls[event.input.name]
+                self.go_to_cube[:] = False
+                self.go_to_target[:] = False
+                self.instant_controls[:] = self._instant_key_controls[event.input.name]
             if event.input.name in self._permanent_key_controls:
-                self.go_to_cube = False
-                self.go_to_target = False
-                self.permanent_controls[0] = self._permanent_key_controls[event.input.name]
-        # On key release, the robot stops moving
+                self.go_to_cube[:] = False
+                self.go_to_target[:] = False
+                self.permanent_controls[:] = self._permanent_key_controls[event.input.name]
+        # On key release, all robots stop moving
         elif event.type == carb.input.KeyboardEventType.KEY_RELEASE:
-            self.go_to_cube = False
-            self.go_to_target = False
-            self.instant_controls[0] = self._instant_key_controls["ZEROS"]
+            self.go_to_cube[:] = False
+            self.go_to_target[:] = False
+            self.instant_controls[:] = self._instant_key_controls["ZEROS"]
 
     def _setup_scene(self):
         self.pick_and_place = Articulation(self.cfg.robot_cfg)
@@ -225,28 +238,31 @@ def _pre_physics_step(self, actions: torch.Tensor) -> None:
 
     def _apply_action(self) -> None:
         # We use the keyboard outputs as an action.
-        if self.go_to_cube:
-            # Effort based proportional controller to track the cube position
-            head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]]
-            head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]]
-            cube_pos_x = self.cube.data.root_pos_w[:, 0] - self.scene.env_origins[:, 0]
-            cube_pos_y = self.cube.data.root_pos_w[:, 1] - self.scene.env_origins[:, 1]
-            d_cube_robot_x = cube_pos_x - head_pos_x
-            d_cube_robot_y = cube_pos_y - head_pos_y
-            self.instant_controls[0] = torch.tensor(
-                [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
-            )
-        elif self.go_to_target:
-            # Effort based proportional controller to track the target position
-            head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]]
-            head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]]
-            target_pos_x = self.target_pos[:, 0]
-            target_pos_y = self.target_pos[:, 1]
-            d_target_robot_x = target_pos_x - head_pos_x
-            d_target_robot_y = target_pos_y - head_pos_y
-            self.instant_controls[0] = torch.tensor(
-                [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
-            )
+        # Process each environment independently
+        for env_idx in range(self.num_envs):
+            if self.go_to_cube[env_idx]:
+                # Effort based proportional controller to track the cube position
+                head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
+                head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
+                cube_pos_x = self.cube.data.root_pos_w[env_idx, 0] - self.scene.env_origins[env_idx, 0]
+                cube_pos_y = self.cube.data.root_pos_w[env_idx, 1] - self.scene.env_origins[env_idx, 1]
+                d_cube_robot_x = cube_pos_x - head_pos_x
+                d_cube_robot_y = cube_pos_y - head_pos_y
+                self.instant_controls[env_idx] = torch.tensor(
+                    [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
+                )
+            elif self.go_to_target[env_idx]:
+                # Effort based proportional controller to track the target position
+                head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
+                head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
+                target_pos_x = self.target_pos[env_idx, 0]
+                target_pos_y = self.target_pos[env_idx, 1]
+                d_target_robot_x = target_pos_x - head_pos_x
+                d_target_robot_y = target_pos_y - head_pos_y
+                self.instant_controls[env_idx] = torch.tensor(
+                    [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
+                )
+        
         # Set the joint effort targets for the picker
         self.pick_and_place.set_joint_effort_target(
             self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx
@@ -258,7 +274,7 @@ def _apply_action(self) -> None:
             self.permanent_controls[:, 0].unsqueeze(dim=1), joint_ids=self._z_dof_idx
         )
         # Set the gripper command
-        self.gripper.set_grippers_command(self.instant_controls[:, 2].unsqueeze(dim=1))
+        self.gripper.set_grippers_command(self.instant_controls[:, 2])
 
     def _get_observations(self) -> dict:
         # Get the observations
@@ -397,8 +413,11 @@ def _debug_vis_callback(self, event):
 
 def main():
     """Main function."""
+    # create environment configuration
+    env_cfg = PickAndPlaceEnvCfg()
+    env_cfg.scene.num_envs = args_cli.num_envs
     # create environment
-    pick_and_place = PickAndPlaceEnv(PickAndPlaceEnvCfg())
+    pick_and_place = PickAndPlaceEnv(env_cfg)
     obs, _ = pick_and_place.reset()
     while simulation_app.is_running():
         # check for selected robots
diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py
index d6faec37316..135980e92c7 100644
--- a/scripts/reinforcement_learning/rl_games/play.py
+++ b/scripts/reinforcement_learning/rl_games/play.py
@@ -95,10 +95,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # override configurations with non-hydra CLI arguments
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-    # update agent device to match simulation device
-    if args_cli.device is not None:
-        agent_cfg["params"]["config"]["device"] = args_cli.device
-        agent_cfg["params"]["config"]["device_name"] = args_cli.device
 
     # randomly sample a seed if seed = -1
     if args_cli.seed == -1:
diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py
index 634e5975676..d6900a3789f 100644
--- a/scripts/reinforcement_learning/rl_games/train.py
+++ b/scripts/reinforcement_learning/rl_games/train.py
@@ -102,11 +102,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
             "Please use GPU device (e.g., --device cuda) for distributed training."
         )
 
-    # update agent device to match simulation device
-    if args_cli.device is not None:
-        agent_cfg["params"]["config"]["device"] = args_cli.device
-        agent_cfg["params"]["config"]["device_name"] = args_cli.device
-
     # randomly sample a seed if seed = -1
     if args_cli.seed == -1:
         args_cli.seed = random.randint(0, 10000)
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
index 8b66feb28aa..ad739f4559a 100644
--- a/scripts/reinforcement_learning/rsl_rl/train.py
+++ b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -182,7 +182,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions, rl_device=agent_cfg.device)
 
     # create runner from rsl-rl
     if agent_cfg.class_name == "OnPolicyRunner":
diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
index 9d0173b3702..f33f3f354b6 100644
--- a/source/isaaclab/config/extension.toml
+++ b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.47.10"
+version = "0.48.0"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
index b0f1719d722..4acbc1c9102 100644
--- a/source/isaaclab/docs/CHANGELOG.rst
+++ b/source/isaaclab/docs/CHANGELOG.rst
@@ -2,6 +2,55 @@ Changelog
 ---------
 
 
+0.48.0 (2025-11-07)
+~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added ``enable_cpu_readback`` parameter to :class:`~isaaclab.sim.SimulationCfg` to control whether physics data
+  is automatically copied from GPU to CPU. When enabled with GPU simulation, allows data to be returned on CPU
+  while physics runs on GPU.
+* Added ``device`` parameter to :class:`~isaaclab.scene.InteractiveScene` to explicitly specify device for scene
+  tensor allocation, enabling proper device separation between simulation and task/environment.
+* Added ``device`` configuration field to :class:`~isaaclab.envs.DirectRLEnvCfg`, 
+  :class:`~isaaclab.envs.DirectMARLEnvCfg`, and :class:`~isaaclab.envs.ManagerBasedEnvCfg` to allow explicit
+  control of task device independent from simulation device.
+* Added simulation device information to environment initialization print output for better visibility of the
+  three-layer device architecture (simulation device, environment device, training device).
+
+Changed
+^^^^^^^
+
+* Modified :class:`~isaaclab.assets.SurfaceGripper` to support GPU simulation with CPU readback. Now validates
+  that either simulation runs on CPU or ``enable_cpu_readback=True`` is set for GPU simulation.
+* Updated all environment classes (:class:`~isaaclab.envs.DirectRLEnv`, :class:`~isaaclab.envs.DirectMARLEnv`,
+  :class:`~isaaclab.envs.ManagerBasedEnv`) to pass task device to :class:`~isaaclab.scene.InteractiveScene`
+  for proper device initialization.
+* Updated RL training scripts (RSL-RL, RL-Games, skrl, Stable-Baselines3) to decouple simulation device (``--device`` flag)
+  from RL training device. RL training device now uses configuration defaults unless in distributed mode.
+* Enhanced RL library wrappers (:class:`~isaaclab_rl.rsl_rl.RslRlVecEnvWrapper`, 
+  :class:`~isaaclab_rl.rl_games.RlGamesVecEnvWrapper`) to properly handle device transfers between environment
+  device and RL training device.
+
+Fixed
+^^^^^
+
+* Fixed device mismatch issues when using ``enable_cpu_readback=True`` by ensuring ``scene.env_origins`` and
+  other scene tensors are allocated on the correct task device.
+* Fixed RL-Games wrapper to properly transfer observations from environment device to RL device in addition
+  to existing action transfers.
+* Fixed environment buffers (``reset_buf``, ``episode_length_buf``) in :class:`~isaaclab.envs.DirectRLEnv`,
+  :class:`~isaaclab.envs.DirectMARLEnv`, and :class:`~isaaclab.envs.ManagerBasedRLEnv` to be allocated on
+  environment device instead of simulation device.
+* Fixed environment device property in all environment classes to automatically default to CPU when
+  ``enable_cpu_readback=True`` is set, ensuring ``env_ids`` and other environment buffers are created on
+  the correct device without requiring explicit ``device`` configuration.
+* Fixed ``episode_length_buf`` initialization in :class:`~isaaclab.envs.ManagerBasedRLEnv` to respect
+  ``enable_cpu_readback`` setting, preventing device mismatch errors in termination manager when using
+  CPU readback with GPU simulation.
+
+
 0.47.10 (2025-11-06)
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py b/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py
index 50a17d85efe..3f01b41e74d 100644
--- a/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py
+++ b/source/isaaclab/isaaclab/assets/surface_gripper/surface_gripper.py
@@ -42,8 +42,10 @@ class SurfaceGripper(AssetBase):
          function is called automatically for every simulation step, and does not need to be called by the user.
 
     Note:
-        The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU.
-        Use `--device cpu` to run the simulation on CPU.
+        The SurfaceGripper requires data on CPU. You can either:
+        
+        1. Run simulation on CPU: ``sim.device='cpu'``
+        2. Run simulation on GPU with CPU readback: ``sim.device='cuda:0'`` and ``sim.enable_cpu_readback=True``
     """
 
     def __init__(self, cfg: SurfaceGripperCfg):
@@ -243,22 +245,32 @@ def _initialize_impl(self) -> None:
         """Initializes the gripper-related handles and internal buffers.
 
         Raises:
-            ValueError: If the simulation backend is not CPU.
+            ValueError: If GPU simulation is used without CPU readback enabled.
             RuntimeError: If the Simulation Context is not initialized or if gripper prims are not found.
 
         Note:
-            The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU.
-            Use `--device cpu` to run the simulation on CPU.
+            The SurfaceGripper requires data on CPU. When using GPU physics (``sim.device='cuda:0'``),
+            you must enable CPU readback (``sim.enable_cpu_readback=True``) so that data is automatically
+            copied to CPU.
         """
 
         enable_extension("isaacsim.robot.surface_gripper")
         from isaacsim.robot.surface_gripper import GripperView
 
-        # Check that we are using the CPU backend.
-        if self._device != "cpu":
-            raise Exception(
-                "SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU. Use"
-                " `--device cpu` to run the simulation on CPU."
+        # Check that if GPU simulation is used, CPU readback must be enabled
+        # SurfaceGripper needs data on CPU, so either:
+        # 1. Simulation on CPU (self._device == "cpu"), or
+        # 2. Simulation on GPU with enable_cpu_readback=True (data returned on CPU)
+        sim_device = sim_utils.SimulationContext.instance().cfg.device
+        enable_cpu_readback = sim_utils.SimulationContext.instance().cfg.enable_cpu_readback
+        
+        if "cuda" in sim_device.lower() and not enable_cpu_readback:
+            raise ValueError(
+                f"SurfaceGripper requires data on CPU. Current configuration has simulation device '{sim_device}' "
+                f"with enable_cpu_readback={enable_cpu_readback}. "
+                "Please either:\n"
+                "  1. Set sim.device='cpu', or\n"
+                "  2. Set sim.enable_cpu_readback=True to run GPU physics with CPU data readback."
             )
 
         # obtain the first prim in the regex expression (all others are assumed to be a copy of this)
diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env.py b/source/isaaclab/isaaclab/envs/direct_marl_env.py
index 0e7429117fc..d0424f9769f 100644
--- a/source/isaaclab/isaaclab/envs/direct_marl_env.py
+++ b/source/isaaclab/isaaclab/envs/direct_marl_env.py
@@ -103,6 +103,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
 
         # print useful information
         print("[INFO]: Base environment:")
+        print(f"\tSimulation device     : {self.sim.device}")
         print(f"\tEnvironment device    : {self.device}")
         print(f"\tEnvironment seed      : {self.cfg.seed}")
         print(f"\tPhysics step-size     : {self.physics_dt}")
@@ -121,7 +122,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
             # set the stage context for scene creation steps which use the stage
             with use_stage(self.sim.get_initial_stage()):
-                self.scene = InteractiveScene(self.cfg.scene)
+                self.scene = InteractiveScene(self.cfg.scene, device=self.device)
                 self._setup_scene()
                 attach_stage_to_usd_context()
         print("[INFO]: Scene manager: ", self.scene)
@@ -184,7 +185,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         self.common_step_counter = 0
         # -- init buffers
         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
-        self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
+        self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
 
         # setup the observation, state and action spaces
         self._configure_env_spaces()
@@ -266,7 +267,19 @@ def step_dt(self) -> float:
 
     @property
     def device(self):
-        """The device on which the environment is running."""
+        """The device on which the task computations are performed.
+
+        This can be different from :attr:`sim.device` when using CPU readback.
+        For example, physics can run on GPU while task buffers are on CPU.
+        """
+        # If device is explicitly set in config, use that
+        if hasattr(self.cfg, "device") and self.cfg.device is not None:
+            return self.cfg.device
+        # If CPU readback is enabled, default to CPU for environment device
+        # since simulation data will be automatically copied to CPU
+        if self.cfg.sim.enable_cpu_readback:
+            return "cpu"
+        # Otherwise fall back to simulation device
         return self.sim.device
 
     @property
diff --git a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py
index 15f57cb4c03..abe69e06f97 100644
--- a/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py
+++ b/source/isaaclab/isaaclab/envs/direct_marl_env_cfg.py
@@ -29,6 +29,21 @@ class DirectMARLEnvCfg:
     sim: SimulationCfg = SimulationCfg()
     """Physics simulation configuration. Default is SimulationCfg()."""
 
+    device: str | None = None
+    """Device for task computations (e.g., 'cuda:0', 'cpu'). Default is None.
+
+    If None, the device is inferred from the simulation device (:attr:`sim.device`).
+
+    This parameter allows separating the physics simulation device from the device where
+    task buffers and computations occur. For example, you can run physics on GPU
+    (:attr:`sim.device` = 'cuda:0') while keeping task data on CPU (:attr:`device` = 'cpu')
+    by enabling CPU readback (:attr:`sim.enable_cpu_readback` = True).
+
+    Note:
+        When using :attr:`sim.enable_cpu_readback` = True with GPU physics, this should
+        be set to 'cpu' since simulation data will be returned on CPU.
+    """
+
     # ui settings
     ui_window_class_type: type | None = BaseEnvWindow
     """The class type of the UI window. Default is None.
diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py
index a4452b707d7..1c24cf39694 100644
--- a/source/isaaclab/isaaclab/envs/direct_rl_env.py
+++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py
@@ -110,6 +110,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs
 
         # print useful information
         print("[INFO]: Base environment:")
+        print(f"\tSimulation device     : {self.sim.device}")
         print(f"\tEnvironment device    : {self.device}")
         print(f"\tEnvironment seed      : {self.cfg.seed}")
         print(f"\tPhysics step-size     : {self.physics_dt}")
@@ -128,7 +129,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs
         with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
             # set the stage context for scene creation steps which use the stage
             with use_stage(self.sim.get_initial_stage()):
-                self.scene = InteractiveScene(self.cfg.scene)
+                self.scene = InteractiveScene(self.cfg.scene, device=self.device)
                 self._setup_scene()
                 attach_stage_to_usd_context()
         print("[INFO]: Scene manager: ", self.scene)
@@ -193,7 +194,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs
         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
         self.reset_terminated = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
         self.reset_time_outs = torch.zeros_like(self.reset_terminated)
-        self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
+        self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
 
         # setup the action and observation spaces for Gym
         self._configure_gym_env_spaces()
@@ -268,7 +269,19 @@ def step_dt(self) -> float:
 
     @property
     def device(self):
-        """The device on which the environment is running."""
+        """The device on which the task computations are performed.
+
+        This can be different from :attr:`sim.device` when using CPU readback.
+        For example, physics can run on GPU while task buffers are on CPU.
+        """
+        # If device is explicitly set in config, use that
+        if hasattr(self.cfg, "device") and self.cfg.device is not None:
+            return self.cfg.device
+        # If CPU readback is enabled, default to CPU for environment device
+        # since simulation data will be automatically copied to CPU
+        if self.cfg.sim.enable_cpu_readback:
+            return "cpu"
+        # Otherwise fall back to simulation device
         return self.sim.device
 
     @property
diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py
index b378beaa86f..1bae9a2176c 100644
--- a/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py
+++ b/source/isaaclab/isaaclab/envs/direct_rl_env_cfg.py
@@ -29,6 +29,21 @@ class DirectRLEnvCfg:
     sim: SimulationCfg = SimulationCfg()
     """Physics simulation configuration. Default is SimulationCfg()."""
 
+    device: str | None = None
+    """Device for task computations (e.g., 'cuda:0', 'cpu'). Default is None.
+
+    If None, the device is inferred from the simulation device (:attr:`sim.device`).
+
+    This parameter allows separating the physics simulation device from the device where
+    task buffers and computations occur. For example, you can run physics on GPU
+    (:attr:`sim.device` = 'cuda:0') while keeping task data on CPU (:attr:`device` = 'cpu')
+    by enabling CPU readback (:attr:`sim.enable_cpu_readback` = True).
+
+    Note:
+        When using :attr:`sim.enable_cpu_readback` = True with GPU physics, this should
+        be set to 'cpu' since simulation data will be returned on CPU.
+    """
+
     # ui settings
     ui_window_class_type: type | None = BaseEnvWindow
     """The class type of the UI window. Default is None.
diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py
index 455af1e2c6b..3132ca949d3 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_env.py
@@ -110,6 +110,7 @@ def __init__(self, cfg: ManagerBasedEnvCfg):
 
         # print useful information
         print("[INFO]: Base environment:")
+        print(f"\tSimulation device     : {self.sim.device}")
         print(f"\tEnvironment device    : {self.device}")
         print(f"\tEnvironment seed      : {self.cfg.seed}")
         print(f"\tPhysics step-size     : {self.physics_dt}")
@@ -134,7 +135,7 @@ def __init__(self, cfg: ManagerBasedEnvCfg):
         with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
             # set the stage context for scene creation steps which use the stage
             with use_stage(self.sim.get_initial_stage()):
-                self.scene = InteractiveScene(self.cfg.scene)
+                self.scene = InteractiveScene(self.cfg.scene, device=self.device)
                 attach_stage_to_usd_context()
         print("[INFO]: Scene manager: ", self.scene)
 
@@ -236,7 +237,19 @@ def step_dt(self) -> float:
 
     @property
     def device(self):
-        """The device on which the environment is running."""
+        """The device on which the task computations are performed.
+
+        This can be different from :attr:`sim.device` when using CPU readback.
+        For example, physics can run on GPU while task buffers are on CPU.
+        """
+        # If device is explicitly set in config, use that
+        if hasattr(self.cfg, "device") and self.cfg.device is not None:
+            return self.cfg.device
+        # If CPU readback is enabled, default to CPU for environment device
+        # since simulation data will be automatically copied to CPU
+        if self.cfg.sim.enable_cpu_readback:
+            return "cpu"
+        # Otherwise fall back to simulation device
         return self.sim.device
 
     @property
diff --git a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py
index 03353baf34d..22ead032bc7 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_env_cfg.py
@@ -46,6 +46,21 @@ class ManagerBasedEnvCfg:
     sim: SimulationCfg = SimulationCfg()
     """Physics simulation configuration. Default is SimulationCfg()."""
 
+    device: str | None = None
+    """Device for task computations (e.g., 'cuda:0', 'cpu'). Default is None.
+
+    If None, the device is inferred from the simulation device (:attr:`sim.device`).
+
+    This parameter allows separating the physics simulation device from the device where
+    task buffers and computations occur. For example, you can run physics on GPU
+    (:attr:`sim.device` = 'cuda:0') while keeping task data on CPU (:attr:`device` = 'cpu')
+    by enabling CPU readback (:attr:`sim.enable_cpu_readback` = True).
+
+    Note:
+        When using :attr:`sim.enable_cpu_readback` = True with GPU physics, this should
+        be set to 'cpu' since simulation data will be returned on CPU.
+    """
+
     # ui settings
     ui_window_class_type: type | None = BaseEnvWindow
     """The class type of the UI window. Default is None.
diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
index 861072dec0a..81ddbdc501d 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
@@ -76,7 +76,16 @@ def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode: str | None = None, **
         self.common_step_counter = 0
 
         # initialize the episode length buffer BEFORE loading the managers to use it in mdp functions.
-        self.episode_length_buf = torch.zeros(cfg.scene.num_envs, device=cfg.sim.device, dtype=torch.long)
+        # Note: This needs to be on the environment device, not the simulation device
+        # We compute device from cfg directly since self.cfg is not set yet
+        if hasattr(cfg, "device") and cfg.device is not None:
+            device = cfg.device
+        elif cfg.sim.enable_cpu_readback:
+            # If CPU readback is enabled, default to CPU for environment device
+            device = "cpu"
+        else:
+            device = cfg.sim.device
+        self.episode_length_buf = torch.zeros(cfg.scene.num_envs, device=device, dtype=torch.long)
 
         # initialize the base class to setup the scene.
         super().__init__(cfg=cfg)
diff --git a/source/isaaclab/isaaclab/scene/interactive_scene.py b/source/isaaclab/isaaclab/scene/interactive_scene.py
index 15739c33ad7..723bf11f80e 100644
--- a/source/isaaclab/isaaclab/scene/interactive_scene.py
+++ b/source/isaaclab/isaaclab/scene/interactive_scene.py
@@ -107,16 +107,18 @@ class MySceneCfg(InteractiveSceneCfg):
         for more details.
     """
 
-    def __init__(self, cfg: InteractiveSceneCfg):
+    def __init__(self, cfg: InteractiveSceneCfg, device: str | None = None):
         """Initializes the scene.
 
         Args:
             cfg: The configuration class for the scene.
+            device: The device on which scene tensors should be allocated. If None, defaults to simulation device.
         """
         # check that the config is valid
         cfg.validate()
         # store inputs
         self.cfg = cfg
+        self._device = device
         # initialize scene elements
         self._terrain = None
         self._articulations = dict()
@@ -338,6 +340,8 @@ def physics_dt(self) -> float:
     @property
     def device(self) -> str:
         """The device on which the scene is created."""
+        if self._device is not None:
+            return self._device
         return sim_utils.SimulationContext.instance().device  # pyright: ignore [reportOptionalMemberAccess]
 
     @property
diff --git a/source/isaaclab/isaaclab/sim/simulation_cfg.py b/source/isaaclab/isaaclab/sim/simulation_cfg.py
index 380dba26c51..ca10ca26344 100644
--- a/source/isaaclab/isaaclab/sim/simulation_cfg.py
+++ b/source/isaaclab/isaaclab/sim/simulation_cfg.py
@@ -390,6 +390,26 @@ class SimulationCfg:
         running under the hood.
     """
 
+    enable_cpu_readback: bool = False
+    """Enable/disable automatic readback of physics data to CPU. Default is False.
+
+    When set to :obj:`True`, physics simulation data (positions, velocities, etc.) is
+    copied to the CPU, making it readily available on the host. This may be necessary for certain
+    operations that require CPU access to physics data, at the cost of reduced performance.
+
+    When set to :obj:`False` (default), physics data is kept on the GPU and not automatically
+    copied to the CPU. This provides optimal performance when running GPU-accelerated physics by avoiding
+    unnecessary memory transfers.
+
+    Note:
+        This setting is only applicable when :attr:`device` is a CUDA device. When the simulation
+        device is CPU, this flag is ignored as all data is already on the CPU.
+
+        Setting this to :obj:`True` with a CUDA device allows running physics simulation on the GPU
+        while still having CPU access to the data, which can be useful when
+        interfacing with CPU-only code.
+    """
+
     physx: PhysxCfg = PhysxCfg()
     """PhysX solver settings. Default is PhysxCfg()."""
 
diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py
index 83277635acf..9082a24e376 100644
--- a/source/isaaclab/isaaclab/sim/simulation_context.py
+++ b/source/isaaclab/isaaclab/sim/simulation_context.py
@@ -297,6 +297,10 @@ def __init__(self, cfg: SimulationCfg | None = None):
                 stage=self._initial_stage,
             )
 
+        # apply cpu readback setting after creating the simulation context
+        # this overrides the default behavior set by omni_isaac_sim's PhysicsContext
+        self._apply_cpu_readback_setting()
+
     """
     Properties - Override.
     """
@@ -649,6 +653,30 @@ def clear_instance(cls):
     Helper Functions
     """
 
+    def _apply_cpu_readback_setting(self):
+        """Applies the CPU readback setting from the configuration.
+
+        This method overrides the default suppress readback behavior set by Isaac Sim
+        based on the user-specified configuration. This allows users to control whether
+        physics data is automatically copied from device (GPU) to host (CPU).
+
+        When enable_cpu_readback is False (default), data is kept on GPU when simulation is set to GPU.
+        When explicitly set to True, data will be returned on CPU even when simulation is set to GPU.
+
+        Note:
+            This setting is only applicable when the simulation device is CUDA. For CPU
+            simulations, this setting is ignored as data is already on CPU.
+        """
+        # Only apply if user has enabled cpu readback AND device is CUDA
+        if self.cfg.enable_cpu_readback and "cuda" in self.cfg.device.lower():
+            # User wants CPU readback enabled, so we override the default behavior
+            # Note: enable_cpu_readback=True means suppressReadback=False
+            set_carb_setting(self.carb_settings, "/physics/suppressReadback", False)
+            omni.log.info(
+                "Physics CPU readback enabled: Data will be automatically copied to host (CPU). "
+                "This may reduce performance but makes data readily available on CPU."
+            )
+
     def _apply_physics_settings(self):
         """Sets various carb physics settings."""
         # enable hydra scene-graph instancing
diff --git a/source/isaaclab/test/assets/test_surface_gripper.py b/source/isaaclab/test/assets/test_surface_gripper.py
index c2f81143f59..e3dc27085b2 100644
--- a/source/isaaclab/test/assets/test_surface_gripper.py
+++ b/source/isaaclab/test/assets/test_surface_gripper.py
@@ -206,8 +206,8 @@ def test_initialization(sim, num_articulations, device, add_ground_plane) -> Non
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("add_ground_plane", [True])
 @pytest.mark.isaacsim_ci
-def test_raise_error_if_not_cpu(sim, device, add_ground_plane) -> None:
-    """Test that the SurfaceGripper raises an error if the device is not CPU."""
+def test_raise_error_if_gpu_without_cpu_readback(sim, device, add_ground_plane) -> None:
+    """Test that the SurfaceGripper raises an error if GPU is used without CPU readback enabled."""
     isaac_sim_version = get_version()
     if int(isaac_sim_version[2]) < 5:
         return
@@ -217,9 +217,61 @@ def test_raise_error_if_not_cpu(sim, device, add_ground_plane) -> None:
         surface_gripper_cfg, articulation_cfg, num_articulations, device
     )
 
-    with pytest.raises(Exception):
+    # Should raise ValueError since GPU sim without enable_cpu_readback=True
+    with pytest.raises(ValueError, match="SurfaceGripper requires data on CPU"):
         sim.reset()
 
 
+@pytest.mark.parametrize("num_articulations", [1])
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("add_ground_plane", [True])
+@pytest.mark.isaacsim_ci
+def test_gpu_with_cpu_readback(sim, device, add_ground_plane) -> None:
+    """Test that SurfaceGripper works with GPU simulation when CPU readback is enabled.
+
+    This test verifies that:
+    1. GPU simulation with enable_cpu_readback=True works correctly.
+    2. The surface gripper can be initialized on GPU with CPU data.
+    3. The command and state buffers work correctly in this configuration.
+
+    Args:
+        num_articulations: The number of articulations to initialize.
+        device: The device to run the test on.
+        add_ground_plane: Whether to add a ground plane to the simulation.
+    """
+    isaac_sim_version = get_version()
+    if int(isaac_sim_version[2]) < 5:
+        return
+    
+    # Set enable_cpu_readback=True for GPU simulation
+    sim.cfg.enable_cpu_readback = True
+    
+    surface_gripper_cfg, articulation_cfg = generate_surface_gripper_cfgs(kinematic_enabled=False)
+    surface_gripper, articulation, _ = generate_surface_gripper(
+        surface_gripper_cfg, articulation_cfg, num_articulations, device
+    )
+
+    sim.reset()
+
+    assert articulation.is_initialized
+    assert surface_gripper.is_initialized
+
+    # Check that the command and state buffers have the correct shapes
+    assert surface_gripper.command.shape == (num_articulations,)
+    assert surface_gripper.state.shape == (num_articulations,)
+
+    # Check that the command and state are initialized to the correct values
+    assert surface_gripper.command == 0.0  # Idle command after a reset
+    assert surface_gripper.state == -1.0  # Open state after a reset
+
+    # Simulate physics
+    for _ in range(10):
+        # perform rendering
+        sim.step()
+        # update articulation
+        articulation.update(sim.cfg.dt)
+        surface_gripper.update(sim.cfg.dt)
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v", "--maxfail=1"])
diff --git a/source/isaaclab/test/sim/test_simulation_context.py b/source/isaaclab/test/sim/test_simulation_context.py
index f0f783463d2..a1cab081d4c 100644
--- a/source/isaaclab/test/sim/test_simulation_context.py
+++ b/source/isaaclab/test/sim/test_simulation_context.py
@@ -146,3 +146,115 @@ def test_zero_gravity():
     gravity_dir, gravity_mag = sim.get_physics_context().get_gravity()
     gravity = np.array(gravity_dir) * gravity_mag
     np.testing.assert_almost_equal(gravity, cfg.gravity)
+
+
+@pytest.mark.isaacsim_ci
+def test_cpu_readback_default_cuda():
+    """Test default behavior with CUDA device (enable_cpu_readback=False)."""
+    import carb
+
+    # Create simulation context with default settings on CUDA
+    cfg = SimulationCfg(device="cuda:0")  # enable_cpu_readback defaults to False
+    sim = SimulationContext(cfg)
+
+    # Check the carb setting - default (False) should not override omni_isaac_sim's behavior
+    # omni_isaac_sim sets suppressReadback=True for CUDA by default
+    carb_settings = carb.settings.get_settings()
+    suppress_readback = carb_settings.get_as_bool("/physics/suppressReadback")
+
+    # With default settings (enable_cpu_readback=False), we don't override, so omni_isaac_sim's
+    # default behavior applies (suppressReadback=True for CUDA)
+    assert suppress_readback is True, "Default CUDA behavior should have suppressReadback=True"
+
+
+@pytest.mark.isaacsim_ci
+def test_cpu_readback_enabled():
+    """Test enabling CPU readback (enable_cpu_readback=True)."""
+    import carb
+
+    # Create simulation context with CPU readback enabled
+    cfg = SimulationCfg(device="cuda:0", enable_cpu_readback=True)
+    sim = SimulationContext(cfg)
+
+    # Check the carb setting - should be suppressReadback=False
+    carb_settings = carb.settings.get_settings()
+    suppress_readback = carb_settings.get_as_bool("/physics/suppressReadback")
+
+    assert suppress_readback is False, "enable_cpu_readback=True should set suppressReadback=False"
+
+
+@pytest.mark.isaacsim_ci
+def test_cpu_readback_disabled():
+    """Test with CPU readback disabled (enable_cpu_readback=False, explicit)."""
+    import carb
+
+    # Create simulation context with CPU readback explicitly disabled
+    cfg = SimulationCfg(device="cuda:0", enable_cpu_readback=False)
+    sim = SimulationContext(cfg)
+
+    # Check the carb setting - should use omni_isaac_sim's default (suppressReadback=True)
+    carb_settings = carb.settings.get_settings()
+    suppress_readback = carb_settings.get_as_bool("/physics/suppressReadback")
+
+    # enable_cpu_readback=False means we don't override, so default applies
+    assert suppress_readback is True, "enable_cpu_readback=False should use default suppressReadback=True"
+
+
+@pytest.mark.isaacsim_ci
+def test_cpu_readback_override():
+    """Test that enable_cpu_readback properly overrides omni_isaac_sim's default behavior."""
+    import carb
+    import isaacsim.core.utils.stage as stage_utils
+
+    # First create with default settings
+    cfg_default = SimulationCfg(device="cuda:0")
+    sim_default = SimulationContext(cfg_default)
+
+    carb_settings = carb.settings.get_settings()
+    default_value = carb_settings.get_as_bool("/physics/suppressReadback")
+
+    # Clean up
+    sim_default.clear_all_callbacks()
+    sim_default.clear_instance()
+
+    # Create stage again
+    stage_utils.create_new_stage()
+
+    # Now create with explicit enable_cpu_readback=True (opposite of default)
+    cfg_override = SimulationCfg(device="cuda:0", enable_cpu_readback=True)
+    sim_override = SimulationContext(cfg_override)
+
+    override_value = carb_settings.get_as_bool("/physics/suppressReadback")
+
+    # The override should be different from default (if default was True, override should be False)
+    # enable_cpu_readback=True -> suppressReadback=False
+    assert override_value is False, "enable_cpu_readback=True should result in suppressReadback=False"
+
+    # If default was True (GPU optimized), then override should be False (CPU readback enabled)
+    if default_value is True:
+        assert override_value is False, "Override successfully changed suppressReadback from True to False"
+
+    # Clean up
+    sim_override.clear_all_callbacks()
+    sim_override.clear_instance()
+
+
+@pytest.mark.isaacsim_ci
+def test_cpu_readback_ignored_on_cpu_device():
+    """Test that enable_cpu_readback is ignored when simulation device is CPU."""
+    import carb
+
+    # Create simulation context with CPU device and enable_cpu_readback=True
+    # This should trigger a warning but not apply any settings
+    cfg = SimulationCfg(device="cpu", enable_cpu_readback=True)
+    sim = SimulationContext(cfg)
+
+    # The flag should be ignored for CPU devices
+    # We can't really check the carb setting as CPU device doesn't use suppressReadback
+    # but we verify that the simulation still initializes successfully
+    assert sim.device == "cpu", "Simulation device should be CPU"
+
+    # Clean up
+    sim.clear_all_callbacks()
+    sim.clear_instance()
+
diff --git a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py
index 8c448c172ac..d60e53bf339 100644
--- a/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py
+++ b/source/isaaclab_rl/isaaclab_rl/rl_games/rl_games.py
@@ -319,6 +319,10 @@ def _process_obs(self, obs_dict: VecEnvObs) -> dict[str, torch.Tensor] | dict[st
             - ``"obs"``: either a concatenated tensor (``concate_obs_group=True``) or a Dict of group tensors.
             - ``"states"`` (optional): same structure as above when state groups are configured; omitted otherwise.
         """
+        # move observations to RL device if different from sim device
+        if self._rl_device != self._sim_device:
+            obs_dict = {key: obs.to(device=self._rl_device) for key, obs in obs_dict.items()}
+        
         # clip the observations
         for key, obs in obs_dict.items():
             obs_dict[key] = torch.clamp(obs, -self._clip_obs, self._clip_obs)
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
index 5b03a7c639b..0d833b6d59e 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
@@ -139,7 +139,15 @@ class RslRlBaseRunnerCfg:
     """The seed for the experiment. Default is 42."""
 
     device: str = "cuda:0"
-    """The device for the rl-agent. Default is cuda:0."""
+    """The device for the rl-agent. Default is cuda:0.
+
+    This is where the RL policy and training computations occur. This can be different
+    from the environment device (where task buffers are) and the simulation device
+    (where physics runs). For example:
+    - sim.device = "cuda:0" (GPU physics)
+    - env.device = "cpu" (task buffers with CPU readback)
+    - rl.device = "cuda:0" (RL training on GPU)
+    """
 
     num_steps_per_env: int = MISSING
     """The number of steps per environment per update."""
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
index 73ceae04693..74c4bc93ad8 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -24,7 +24,7 @@ class RslRlVecEnvWrapper(VecEnv):
         https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
     """
 
-    def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None):
+    def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None, rl_device: str | None = None):
         """Initializes the wrapper.
 
         Note:
@@ -33,6 +33,9 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
         Args:
             env: The environment to wrap around.
             clip_actions: The clipping value for actions. If ``None``, then no clipping is done.
+            rl_device: The device for RL agent/policy. If ``None``, uses the environment device.
+                This allows running the RL agent on a different device than the environment.
+                For example, you can run physics on GPU, have task buffers on CPU, and run RL on GPU.
 
         Raises:
             ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`.
@@ -48,11 +51,22 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
         # initialize the wrapper
         self.env = env
         self.clip_actions = clip_actions
+        
+        # store the RL device (where policy/training happens)
+        # this may be different from env.device (where task buffers are)
+        if rl_device is None:
+            self.rl_device = self.unwrapped.device
+        else:
+            self.rl_device = rl_device
 
         # store information required by wrapper
         self.num_envs = self.unwrapped.num_envs
-        self.device = self.unwrapped.device
+        # RSL-RL accesses self.device to know where the policy should be
+        self.device = self.rl_device
         self.max_episode_length = self.unwrapped.max_episode_length
+        
+        # track the environment device separately
+        self.env_device = self.unwrapped.device
 
         # obtain dimensions of the environment
         if hasattr(self.unwrapped, "action_manager"):
@@ -139,6 +153,9 @@ def seed(self, seed: int = -1) -> int:  # noqa: D102
     def reset(self) -> tuple[TensorDict, dict]:  # noqa: D102
         # reset the environment
         obs_dict, extras = self.env.reset()
+        # move observations to RL device if different from env device
+        if self.rl_device != self.env_device:
+            obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()}
         return TensorDict(obs_dict, batch_size=[self.num_envs]), extras
 
     def get_observations(self) -> TensorDict:
@@ -147,14 +164,26 @@ def get_observations(self) -> TensorDict:
             obs_dict = self.unwrapped.observation_manager.compute()
         else:
             obs_dict = self.unwrapped._get_observations()
+        # move observations to RL device if different from env device
+        if self.rl_device != self.env_device:
+            obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()}
         return TensorDict(obs_dict, batch_size=[self.num_envs])
 
     def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]:
+        # move actions to env device if coming from different RL device
+        if self.rl_device != self.env_device:
+            actions = actions.to(self.env_device)
         # clip actions
         if self.clip_actions is not None:
             actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
         # record step information
         obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
+        # move outputs to RL device if different from env device
+        if self.rl_device != self.env_device:
+            obs_dict = {k: v.to(self.rl_device) if isinstance(v, torch.Tensor) else v for k, v in obs_dict.items()}
+            rew = rew.to(self.rl_device)
+            terminated = terminated.to(self.rl_device)
+            truncated = truncated.to(self.rl_device)
         # compute dones for compatibility with RSL-RL
         dones = (terminated | truncated).to(dtype=torch.long)
         # move time out information to the extras dict
diff --git a/source/isaaclab_tasks/test/env_test_utils.py b/source/isaaclab_tasks/test/env_test_utils.py
index 1034fd9ac92..36bc0495f30 100644
--- a/source/isaaclab_tasks/test/env_test_utils.py
+++ b/source/isaaclab_tasks/test/env_test_utils.py
@@ -96,6 +96,7 @@ def _run_environments(
     multi_agent=False,
     create_stage_in_memory=False,
     disable_clone_in_fabric=False,
+    enable_cpu_readback=False,
 ):
     """Run all environments and check environments return valid signals.
 
@@ -107,6 +108,7 @@ def _run_environments(
         multi_agent: Whether the environment is multi-agent.
         create_stage_in_memory: Whether to create stage in memory.
         disable_clone_in_fabric: Whether to disable fabric cloning.
+        enable_cpu_readback: Whether to enable CPU readback for GPU simulations.
     """
 
     # skip test if stage in memory is not supported
@@ -114,8 +116,9 @@ def _run_environments(
     if isaac_sim_version < 5 and create_stage_in_memory:
         pytest.skip("Stage in memory is not supported in this version of Isaac Sim")
 
-    # skip suction gripper environments as they require CPU simulation and cannot be run with GPU simulation
-    if "Suction" in task_name and device != "cpu":
+    # skip suction gripper environments if CPU readback is disabled and device is not CPU
+    # (they were updated to support GPU with CPU readback)
+    if "Suction" in task_name and device != "cpu" and not enable_cpu_readback:
         return
 
     # skip these environments as they cannot be run with 32 environments within reasonable VRAM
@@ -158,6 +161,7 @@ def _run_environments(
         multi_agent=multi_agent,
         create_stage_in_memory=create_stage_in_memory,
         disable_clone_in_fabric=disable_clone_in_fabric,
+        enable_cpu_readback=enable_cpu_readback,
     )
     print(f""">>> Closing environment: {task_name}""")
     print("-" * 80)
@@ -171,6 +175,7 @@ def _check_random_actions(
     multi_agent: bool = False,
     create_stage_in_memory: bool = False,
     disable_clone_in_fabric: bool = False,
+    enable_cpu_readback: bool = False,
 ):
     """Run random actions and check environments return valid signals.
 
@@ -182,6 +187,7 @@ def _check_random_actions(
         multi_agent: Whether the environment is multi-agent.
         create_stage_in_memory: Whether to create stage in memory.
         disable_clone_in_fabric: Whether to disable fabric cloning.
+        enable_cpu_readback: Whether to enable CPU readback for GPU simulations.
     """
     # create a new context stage, if stage in memory is not enabled
     if not create_stage_in_memory:
@@ -196,6 +202,9 @@ def _check_random_actions(
         env_cfg.sim.create_stage_in_memory = create_stage_in_memory
         if disable_clone_in_fabric:
             env_cfg.scene.clone_in_fabric = False
+        # enable CPU readback if requested
+        if enable_cpu_readback:
+            env_cfg.sim.enable_cpu_readback = True
 
         # filter based off multi agents mode and create env
         if multi_agent:
diff --git a/source/isaaclab_tasks/test/test_environments.py b/source/isaaclab_tasks/test/test_environments.py
index 2a0c9d4ea52..6f275ade385 100644
--- a/source/isaaclab_tasks/test/test_environments.py
+++ b/source/isaaclab_tasks/test/test_environments.py
@@ -33,3 +33,16 @@
 def test_environments(task_name, num_envs, device):
     # run environments without stage in memory
     _run_environments(task_name, device, num_envs, create_stage_in_memory=False)
+
+
+@pytest.mark.parametrize("num_envs, device", [(32, "cuda"), (1, "cuda")])
+@pytest.mark.parametrize("task_name", setup_environment(include_play=False, factory_envs=False, multi_agent=False))
+@pytest.mark.isaacsim_ci
+def test_environments_with_cpu_readback(task_name, num_envs, device):
+    """Test environments with CPU readback enabled.
+    
+    This test forces enable_cpu_readback=True for all environments to ensure that
+    the device separation between simulation (GPU) and environment (CPU) works correctly.
+    """
+    # run environments with CPU readback enabled
+    _run_environments(task_name, device, num_envs, create_stage_in_memory=False, enable_cpu_readback=True)