From 0f01472edf0442ce3bca72b9b7bcd9b862a64472 Mon Sep 17 00:00:00 2001 From: DynamoDuan <2030524470@qq.com> Date: Sat, 8 Nov 2025 09:38:17 -0800 Subject: [PATCH 1/2] finish most sim --- openreal2sim/simulation/README.md | 0 openreal2sim/simulation/isaaclab/README.md | 0 .../isaaclab/learning/hybrid_ppo.py | 0 .../isaaclab/learning/simple_ppo.py | 0 openreal2sim/simulation/isaaclab/sim_base.py | 394 +++++++++++- .../simulation/isaaclab/sim_env_factory.py | 75 ++- .../isaaclab/sim_heuristic_manip.py | 472 +++++++++++--- .../isaaclab/sim_preprocess/usd_conversion.py | 10 +- .../isaaclab/sim_randomize_rollout.py | 600 ++++++++++++++++++ .../isaaclab/sim_utils/calibration_utils.py | 0 .../isaaclab/sim_utils/sim_utils.py | 11 +- .../isaaclab/sim_utils/transform_utils.py | 0 .../simulation/isaaclab/task_interface.py | 0 .../simulation/modules/demo_motion_process.py | 392 ++++++++++++ .../simulation/modules/envs/randomzier.py | 348 ++++++++++ .../simulation/modules/envs/task_cfg.py | 86 +++ .../simulation/modules/envs/task_construct.py | 227 +++++++ .../grasp_preprocess}/grasp_generation.py | 18 +- .../grasp_preprocess}/grasp_utils.py | 0 .../hand_preprocess/affordance_extraction.py | 0 .../hand_preprocess/grasp_point_extraction.py | 331 ++++++++++ .../hand_preprocess/hand_extraction.py | 213 +++++++ openreal2sim/simulation/sim_agent.py | 248 ++++++++ .../simulation/utils/compose_config.py | 17 + openreal2sim/simulation/utils/notification.py | 42 ++ 25 files changed, 3375 insertions(+), 109 deletions(-) mode change 100644 => 100755 openreal2sim/simulation/README.md mode change 100644 => 100755 openreal2sim/simulation/isaaclab/README.md mode change 100644 => 100755 openreal2sim/simulation/isaaclab/learning/hybrid_ppo.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/learning/simple_ppo.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_base.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_env_factory.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_heuristic_manip.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py create mode 100755 openreal2sim/simulation/isaaclab/sim_randomize_rollout.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_utils/calibration_utils.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_utils/sim_utils.py mode change 100644 => 100755 openreal2sim/simulation/isaaclab/sim_utils/transform_utils.py create mode 100755 openreal2sim/simulation/isaaclab/task_interface.py create mode 100755 openreal2sim/simulation/modules/demo_motion_process.py create mode 100755 openreal2sim/simulation/modules/envs/randomzier.py create mode 100755 openreal2sim/simulation/modules/envs/task_cfg.py create mode 100644 openreal2sim/simulation/modules/envs/task_construct.py rename openreal2sim/simulation/{isaaclab/sim_preprocess => modules/grasp_preprocess}/grasp_generation.py (94%) mode change 100644 => 100755 rename openreal2sim/simulation/{isaaclab/sim_preprocess => modules/grasp_preprocess}/grasp_utils.py (100%) mode change 100644 => 100755 create mode 100755 openreal2sim/simulation/modules/hand_preprocess/affordance_extraction.py create mode 100755 openreal2sim/simulation/modules/hand_preprocess/grasp_point_extraction.py create mode 100755 openreal2sim/simulation/modules/hand_preprocess/hand_extraction.py create mode 100755 openreal2sim/simulation/sim_agent.py create mode 100755 openreal2sim/simulation/utils/compose_config.py create mode 100755 openreal2sim/simulation/utils/notification.py diff --git a/openreal2sim/simulation/README.md b/openreal2sim/simulation/README.md old mode 100644 new mode 100755 diff --git a/openreal2sim/simulation/isaaclab/README.md b/openreal2sim/simulation/isaaclab/README.md old mode 100644 new mode 100755 diff --git a/openreal2sim/simulation/isaaclab/learning/hybrid_ppo.py b/openreal2sim/simulation/isaaclab/learning/hybrid_ppo.py old mode 100644 new mode 100755 diff --git a/openreal2sim/simulation/isaaclab/learning/simple_ppo.py b/openreal2sim/simulation/isaaclab/learning/simple_ppo.py old mode 100644 new mode 100755 diff --git a/openreal2sim/simulation/isaaclab/sim_base.py b/openreal2sim/simulation/isaaclab/sim_base.py old mode 100644 new mode 100755 index f8f204a..fa8a3cd --- a/openreal2sim/simulation/isaaclab/sim_base.py +++ b/openreal2sim/simulation/isaaclab/sim_base.py @@ -4,13 +4,21 @@ import os import json import random +import shutil from pathlib import Path -from typing import Any, Optional, Dict +from typing import Any, Optional, Dict, Sequence, Tuple from typing import List import numpy as np import torch import imageio +import cv2 +import h5py +import sys +file_path = Path(__file__).resolve() +sys.path.append(str(file_path.parent)) +sys.path.append(str(file_path.parent.parent)) +from modules.envs.task_cfg import CameraInfo, TaskCfg, TrajectoryCfg # Isaac Lab import isaaclab.sim as sim_utils @@ -74,6 +82,10 @@ def __init__( set_physics_props: bool = True, enable_motion_planning: bool = True, debug_level: int = 1, + demo_dir: Optional[Path] = None, + data_dir: Optional[Path] = None, + task_cfg: Optional[TaskCfg] = None, + traj_cfg_list: Optional[List[TrajectoryCfg]] = None, ) -> None: # basic simulation setup self.sim: sim_utils.SimulationContext = sim @@ -86,7 +98,13 @@ def __init__( self.cam_dict = cam_dict self.out_dir: Path = out_dir self.img_folder: str = img_folder - + self.defined_demo_dir: Optional[Path] = demo_dir + if self.defined_demo_dir is not None: + self.defined_demo_dir.mkdir(parents=True, exist_ok=True) + + self.data_dir: Optional[Path] = data_dir + if self.data_dir is not None: + self.data_dir.mkdir(parents=True, exist_ok=True) # scene entities self.robot = scene["robot"] if robot_pose.ndim == 1: @@ -95,9 +113,19 @@ def __init__( assert robot_pose.shape[0] == self.num_envs and robot_pose.shape[1] == 7, \ f"robot_pose must be [B,7], got {robot_pose.shape}" self.robot_pose = robot_pose.to(self.robot.device).contiguous() - - self.object_prim = scene["object_00"] - self.other_object_prims = [scene[key] for key in scene.keys() if f"object_" in key and key != "object_00"] + self.task_cfg = task_cfg + self.traj_cfg_list = traj_cfg_list + # Get object prim based on selected_object_id + # Default to object_00 for backward compatibility + # Note: selected_object_id is set in subclasses after super().__init__() + # So we use a helper method that can be called later + self._selected_object_id = None # Will be set by subclasses + self.object_prim = scene["object_00"] # Default, will be updated if needed + self._update_object_prim() + + # Get all other object prims (excluding the main object) + self.other_object_prims = [scene[key] for key in scene.keys() + if f"object_" in key and key != "object_00"] self.background_prim = scene["background"] self.camera: Camera = scene["camera"] @@ -155,14 +183,17 @@ def __init__( self.save_dict = { "rgb": [], "depth": [], "segmask": [], "joint_pos": [], "joint_vel": [], "actions": [], - "gripper_pos": [], "gripper_cmd": [] + "gripper_pos": [], "gripper_cmd": [], + "composed_rgb": [] # composed rgb image with background and foreground } # visualization self.selected_object_id = 0 + self._selected_object_id = 0 self.debug_level = debug_level self.goal_vis_list = [] + if self.debug_level > 0: for b in range(self.num_envs): cfg = VisualizationMarkersCfg( @@ -183,6 +214,23 @@ def __init__( print(f"prepare curobo motion planning: {enable_motion_planning}") self.prepare_curobo() print("curobo motion planning ready.") + + def _update_object_prim(self): + """Update object_prim based on selected_object_id. Called after selected_object_id is set.""" + if self._selected_object_id is None: + return + try: + from sim_env_factory import get_prim_name_from_oid + oid_str = str(self._selected_object_id) + prim_name = get_prim_name_from_oid(oid_str) + if prim_name in self.scene: + self.object_prim = self.scene[prim_name] + # Update other_object_prims + self.other_object_prims = [self.scene[key] for key in self.scene.keys() + if f"object_" in key and key != prim_name] + except (ImportError, ValueError, KeyError) as e: + # Fallback to object_00 if mapping not available + pass # -------- Curobo Motion Planning ---------- def prepare_curobo(self): @@ -445,7 +493,7 @@ def move_to_ik(self, position: torch.Tensor, quaternion: torch.Tensor, steps: in # Compute next joint command joint_pos_des = self.diff_ik_controller.compute(ee_pos_b, ee_quat_b, jacobian, joint_pos) - + # Apply self.apply_actions(joint_pos_des, gripper_open=gripper_open) @@ -521,6 +569,7 @@ def reset(self, env_ids=None): env_origins_robot = env_origins.to(self.robot.device) # (M,3) robot_pose_world = rp_local.clone() robot_pose_world[:, :3] = env_origins_robot + robot_pose_world[:, :3] + #print(f"[INFO] robot_pose_world: {robot_pose_world}") self.robot.write_root_pose_to_sim(robot_pose_world, env_ids=env_ids_t) self.robot.write_root_velocity_to_sim( torch.zeros((M, 6), device=self.robot.device, dtype=torch.float32), env_ids=env_ids_t @@ -615,22 +664,40 @@ def clear_data(self): for key in self.save_dict.keys(): self.save_dict[key] = [] + + def _demo_dir(self) -> Path: - return self.out_dir / self.img_folder / "demos" / f"demo_{self.demo_id}" + if self.defined_demo_dir is not None: + return self.defined_demo_dir + else: + return self.out_dir / self.img_folder / "demos" / f"demo_{self.demo_id}" def _env_dir(self, base: Path, b: int) -> Path: d = base / f"env_{b:03d}" d.mkdir(parents=True, exist_ok=True) return d + + def _get_next_demo_dir(self, base: Path) -> Path: + already_existing_num = len(list(base.iterdir())) + return base / f"demo_{already_existing_num:03d}" - def save_data(self, ignore_keys: List[str] = []): + def save_data(self, ignore_keys: List[str] = [], env_ids: Optional[List[int]] = None): save_root = self._demo_dir() save_root.mkdir(parents=True, exist_ok=True) - + stacked = {k: np.array(v) for k, v in self.save_dict.items()} - - for b in range(self.num_envs): - env_dir = self._env_dir(save_root, b) + if env_ids is None: + env_ids = self._all_env_ids.cpu().numpy() + + composed_rgb = [] + hdf5_names = [] + for b in env_ids: + if self.defined_demo_dir is None: + env_dir = save_root / f"env_{b:03d}" + else: + env_dir = self._get_next_demo_dir(save_root) + hdf5_names.append(env_dir.name) + env_dir.mkdir(parents=True, exist_ok=True) for key, arr in stacked.items(): if key in ignore_keys: # skip the keys for storage continue @@ -657,8 +724,25 @@ def save_data(self, ignore_keys: List[str] = []): writer.append_data(depth_norm[t]) writer.close() np.save(env_dir / f"{key}.npy", depth_seq) - else: + elif key != "composed_rgb": + #import pdb; pdb.set_trace() np.save(env_dir / f"{key}.npy", arr[:, b]) + video_path = env_dir / "real_video.mp4" + writer = imageio.get_writer(video_path, fps=50, macro_block_size=None) + rgb = np.array(self.save_dict["rgb"]) + mask = np.array(self.save_dict["segmask"]) + bg_rgb_path = self.task_cfg.bg_rgb_path + self.bg_rgb = imageio.imread(bg_rgb_path) + for t in range(rgb.shape[0]): + #import ipdb; ipdb.set_trace() + composed = self.convert_real(mask[t, b], self.bg_rgb, rgb[t, b]) + writer.append_data(composed) + writer.close() + composed_rgb.append(composed) + + + self.export_batch_data_to_hdf5(hdf5_names) + print("[INFO]: Demonstration is saved at: ", save_root) @@ -676,6 +760,243 @@ def save_data(self, ignore_keys: List[str] = []): os.system(f"cp -r {save_root}/* {demo_save_path}") print("[INFO]: Demonstration is saved at: ", demo_save_path) + # def collect_data_from_folder(self, folder_path: Path): + # """ + # Load `.npy` files saved by `save_data` and repopulate `self.save_dict`. + + # The provided directory may either be: + # • A demo folder containing multiple `env_XXX` subdirectories, or + # • A single `env_XXX` directory. + + # Args: + # folder_path: Path to the data directory. + + # Returns: + # A tuple ``(stacked, env_dirs)`` where ``stacked`` maps data keys to + # stacked numpy arrays of shape `[T, B, ...]` (``T`` timesteps, + # ``B`` environments) and ``env_dirs`` is the ordered list of + # environment directories discovered in ``folder_path``. + # """ + # folder_path = Path(folder_path) + # if not folder_path.exists(): + # raise FileNotFoundError(f"[collect_data_from_folder] path does not exist: {folder_path}") + # if folder_path.is_file(): + # raise NotADirectoryError(f"[collect_data_from_folder] expected a directory, got file: {folder_path}") + + # if folder_path.name.startswith("env_"): + # env_dirs = [folder_path] + # else: + # env_dirs = sorted( + # [p for p in folder_path.iterdir() if p.is_dir() and p.name.startswith("env_")], + # key=lambda p: p.name, + # ) + + # if len(env_dirs) == 0: + # raise ValueError(f"[collect_data_from_folder] no env_XXX directories found in {folder_path}") + + # aggregated: Dict[str, List[np.ndarray]] = {} + # for env_dir in env_dirs: + # for npy_file in sorted(env_dir.glob("*.npy")): + # key = npy_file.stem + # try: + # arr = np.load(npy_file, allow_pickle=False) + # except Exception as exc: + # print(f"[collect_data_from_folder] skip {npy_file}: {exc}") + # continue + # aggregated.setdefault(key, []).append(arr) + + # if len(aggregated) == 0: + # raise ValueError(f"[collect_data_from_folder] no npy data found in {folder_path}") + + # stacked: Dict[str, np.ndarray] = {} + # for key, env_slices in aggregated.items(): + # reference_shape = env_slices[0].shape + # for idx, slice_arr in enumerate(env_slices[1:], start=1): + # if slice_arr.shape != reference_shape: + # raise ValueError( + # f"[collect_data_from_folder] inconsistent shapes for '{key}': " + # f"{reference_shape} vs {slice_arr.shape} (env idx {idx})" + # ) + # stacked[key] = np.stack(env_slices, axis=1) # [T, B, ...] + + # for key in self.save_dict.keys(): + # if key not in stacked: + # self.save_dict[key] = [] + # continue + # data = stacked[key] # [T, B, ...] + # self.save_dict[key] = [data[t] for t in range(data.shape[0])] + + # return stacked, env_dirs + + def _encode_rgb_sequence(self, frames: np.ndarray) -> tuple[list[bytes], int]: + """Encode a sequence of RGB frames into JPEG bytes and return padded bytes.""" + if frames.shape[0] == 0: + return [], 1 + + encoded: List[bytes] = [] + max_len = 1 + for frame in frames: + frame_np = np.asarray(frame) + if frame_np.dtype != np.uint8: + frame_np = np.clip(frame_np, 0, 255).astype(np.uint8) + if frame_np.ndim == 3 and frame_np.shape[2] == 4: + frame_np = frame_np[..., :3] + if frame_np.ndim == 3 and frame_np.shape[2] == 3: + frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR) + success, buffer = cv2.imencode(".jpg", frame_np) + if not success: + raise RuntimeError("Failed to encode RGB frame into JPEG for RoboTwin export.") + data_bytes = buffer.tobytes() + encoded.append(data_bytes) + if len(data_bytes) > max_len: + max_len = len(data_bytes) + + padded = [frame_bytes.ljust(max_len, b"\0") for frame_bytes in encoded] + return padded, max_len + + def export_batch_data_to_hdf5(self, hdf5_names: List[str]) -> int: + """Export buffered trajectories to RoboTwin-style HDF5 episodes.""" + if self.data_dir is not None: + target_root = self.data_dir + else: + target_root = self._demo_dir() / "hdf5" + data_dir = Path(target_root) + data_dir.mkdir(parents=True, exist_ok=True) + + + num_envs = len(hdf5_names) + stacked = {k: np.array(v) for k, v in self.save_dict.items()} + + episode_names = [] + for idx, name in enumerate(hdf5_names): + name = str(name) + episode_names.append(name.replace("demo_", "episode_")) + handled_keys = { + "rgb", + "depth", + "segmask", + "joint_pos", + "joint_vel", + "gripper_pos", + "gripper_cmd", + "actions", + } + + camera_params = self._get_camera_parameters() + + for env_idx, episode_name in enumerate(episode_names): + hdf5_path = data_dir / f"{episode_name}.hdf5" + hdf5_path.parent.mkdir(parents=True, exist_ok=True) + + with h5py.File(hdf5_path, "w") as f: + obs_grp = f.create_group("observation") + camera_group_name = "head_camera" if camera_params is not None else "camera" + cam_grp = obs_grp.create_group(camera_group_name) + if camera_params is not None: + intrinsics, extrinsics, resolution = camera_params + cam_grp.create_dataset("intrinsics", data=intrinsics) + cam_grp.create_dataset("extrinsics", data=extrinsics) + cam_grp.attrs["resolution"] = resolution + + if "rgb" in stacked: + rgb_frames = stacked["rgb"][:, env_idx] + encoded_frames, max_len = self._encode_rgb_sequence(rgb_frames) + dtype = f"S{max_len}" if max_len > 0 else "S1" + cam_grp.create_dataset("rgb", data=np.asarray(encoded_frames, dtype=dtype)) + cam_grp.attrs["encoding"] = "jpeg" + cam_grp.attrs["channels"] = 3 + cam_grp.attrs["original_shape"] = rgb_frames.shape + if camera_params is None: + cam_grp.create_dataset("intrinsics", data=np.zeros((3, 3), dtype=np.float32)) + cam_grp.create_dataset("extrinsics", data=np.zeros((4, 4), dtype=np.float32)) + + if "depth" in stacked: + depth_ds = cam_grp.create_dataset("depth", data=stacked["depth"][:, env_idx]) + depth_ds.attrs["encoding"] = "float32" + depth_ds.attrs["unit"] = "meter" + if "segmask" in stacked: + seg_ds = cam_grp.create_dataset( + "segmentation", + data=stacked["segmask"][:, env_idx].astype(np.uint8), + ) + seg_ds.attrs["encoding"] = "uint8" + seg_ds.attrs["color_mapping"] = "instance_id" + + joint_grp = f.create_group("joint_action") + if "joint_pos" in stacked: + joint_grp.create_dataset( + "joint_pos", data=stacked["joint_pos"][:, env_idx].astype(np.float32) + ) + if "joint_vel" in stacked: + joint_grp.create_dataset( + "joint_vel", data=stacked["joint_vel"][:, env_idx].astype(np.float32) + ) + if "gripper_cmd" in stacked: + joint_grp.create_dataset( + "gripper_cmd", data=stacked["gripper_cmd"][:, env_idx].astype(np.float32) + ) + if len(joint_grp.keys()) == 0: + del f["joint_action"] + + if "gripper_pos" in stacked: + endpose_grp = f.create_group("endpose") + endpose_grp.create_dataset( + "gripper_pos", data=stacked["gripper_pos"][:, env_idx].astype(np.float32) + ) + if "gripper_cmd" in stacked: + endpose_grp.create_dataset( + "gripper_cmd", data=stacked["gripper_cmd"][:, env_idx].astype(np.float32) + ) + + if "actions" in stacked: + f.create_dataset( + "actions", data=stacked["actions"][:, env_idx].astype(np.float32) + ) + + extras = {} + for key, value in stacked.items(): + if key in handled_keys: + continue + + extras_grp = f.create_group("extras") + if len(extras) > 0: + for key, value in extras.items(): + extras_grp.create_dataset(key, data=value) + + if self.task_cfg is not None: + + extras_grp.create_dataset("task_desc", data=self.task_cfg.task_desc) + + if self.traj_cfg_list is not None: + traj_i = self.traj_cfg_list[env_idx] + traj_grp = extras_grp.create_group("traj") + traj_grp.create_dataset("robot_pose", data=traj_i.robot_pose) + traj_grp.create_dataset("pregrasp_pose", data=traj_i.pregrasp_pose) + traj_grp.create_dataset("grasp_pose", data=traj_i.grasp_pose) + + + frame_count = stacked["rgb"].shape[0] + meta_grp = f.create_group("meta") + meta_grp.attrs["env_index"] = int(env_idx) + meta_grp.attrs["frame_dt"] = float(self.sim_dt) + meta_grp.attrs["frame_count"] = int(frame_count) + meta_grp.attrs["source"] = "OpenReal2Sim" + meta_grp.attrs["episode_name"] = episode_name + meta_grp.create_dataset("frame_indices", data=np.arange(frame_count, dtype=np.int32)) + + print(f"[INFO]: Exported {num_envs} HDF5 episodes to {data_dir}") + return num_envs + + + + def convert_real(self,segmask, bg_rgb, fg_rgb): + #import pdb; pdb.set_trace() + segmask_2d = segmask[..., 0] + composed = bg_rgb.copy() + composed[segmask_2d] = fg_rgb[segmask_2d] + return composed + + def delete_data(self): save_path = self._demo_dir() failure_root = self.out_dir / self.img_folder / "demos_failures" @@ -686,3 +1007,48 @@ def delete_data(self): for key in self.save_dict.keys(): self.save_dict[key] = [] print("[INFO]: Clear up the folder: ", save_path) + + def _quat_to_rot(self, quat: Sequence[float]) -> np.ndarray: + w, x, y, z = quat + rot = np.array( + [ + [1 - 2 * (y ** 2 + z ** 2), 2 * (x * y - z * w), 2 * (x * z + y * w)], + [2 * (x * y + z * w), 1 - 2 * (x ** 2 + z ** 2), 2 * (y * z - x * w)], + [2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x ** 2 + y ** 2)], + ], + dtype=np.float32, + ) + return rot + + def _get_camera_parameters(self) -> Optional[Tuple[np.ndarray, np.ndarray, Tuple[int, int]]]: + if self.task_cfg is None: + return None + camera_info = getattr(self.task_cfg, "camera_info", None) + if camera_info is None: + return None + + intrinsics = np.array( + [ + [camera_info.fx, 0.0, camera_info.cx], + [0.0, camera_info.fy, camera_info.cy], + [0.0, 0.0, 1.0], + ], + dtype=np.float32, + ) + + if getattr(camera_info, "camera_opencv_to_world", None) is not None: + extrinsics = np.array(camera_info.camera_opencv_to_world, dtype=np.float32) + else: + extrinsics = np.eye(4, dtype=np.float32) + if getattr(camera_info, "camera_heading_wxyz", None) is not None: + rot = self._quat_to_rot(camera_info.camera_heading_wxyz) + else: + rot = np.eye(3, dtype=np.float32) + extrinsics[:3, :3] = rot + if getattr(camera_info, "camera_position", None) is not None: + extrinsics[:3, 3] = np.array(camera_info.camera_position, dtype=np.float32) + resolution = ( + int(camera_info.width), + int(camera_info.height), + ) + return intrinsics, extrinsics, resolution diff --git a/openreal2sim/simulation/isaaclab/sim_env_factory.py b/openreal2sim/simulation/isaaclab/sim_env_factory.py old mode 100644 new mode 100755 index f3d662e..429e72e --- a/openreal2sim/simulation/isaaclab/sim_env_factory.py +++ b/openreal2sim/simulation/isaaclab/sim_env_factory.py @@ -52,6 +52,8 @@ class SceneCtx: obj_physics: List[Dict] = None use_ground_plane: bool = False ground_z: Optional[float] = None + oid_to_index: Optional[Dict[str, int]] = None # Maps oid (str) to prim index + index_to_oid: Optional[Dict[int, str]] = None # Maps prim index to oid (str) _SCENE_CTX: Optional[SceneCtx] = None @@ -247,7 +249,23 @@ def init_scene_from_scene_dict( If robot pose not provided, sample one with robot_placement_candidates_v2(). """ cam_dict = cfgs["cam_cfg"] - obj_paths = [o["usd"] for o in scene["objects"].values()] + + # Sort objects by oid to ensure consistent ordering + # scene["objects"] keys are oid strings (e.g., "1", "2", ...) + sorted_objects = sorted(scene["objects"].items(), key=lambda x: int(x[0])) + + # Build oid <-> index mapping + oid_to_index = {} + index_to_oid = {} + obj_paths = [] + obj_physics_list = [] + + for index, (oid_str, obj_data) in enumerate(sorted_objects): + oid_to_index[oid_str] = index + index_to_oid[index] = oid_str + obj_paths.append(obj_data["usd"]) + obj_physics_list.append(obj_data.get("physics", None)) + background_path = scene["background"]["usd"] # overwrite physics @@ -255,12 +273,12 @@ def init_scene_from_scene_dict( obj_physics = cfgs["physics_cfg"]["obj_physics"] bg_physics = cfgs["physics_cfg"]["bg_physics"] if obj_physics is None: - obj_physics = [o.get("physics", None) for o in scene["objects"].values()] + obj_physics = obj_physics_list # Use physics from sorted objects elif isinstance(obj_physics, dict): - obj_physics = [obj_physics for _ in scene["objects"].values()] + obj_physics = [obj_physics for _ in range(len(obj_paths))] elif isinstance(obj_physics, list): - assert len(obj_physics) == len(scene["objects"]), \ - "obj_physics must be a list of the same length as scene['objects'] if provided." + assert len(obj_physics) == len(obj_paths), \ + f"obj_physics must be a list of the same length as objects ({len(obj_paths)}), got {len(obj_physics)}." pass else: raise TypeError("obj_physics must be None, a dict, or a list of dicts.") @@ -288,6 +306,8 @@ def init_scene_from_scene_dict( obj_physics=list(obj_physics), use_ground_plane=use_ground_plane, ground_z=ground_z, + oid_to_index=oid_to_index, + index_to_oid=index_to_oid, ) @@ -370,6 +390,51 @@ def load_scene_json(key: str) -> dict: return json.load(open(scene_path)) +def get_prim_name_from_oid(oid: str | int) -> str: + """ + Get the prim name (e.g., "object_00") from object id. + + Args: + oid: Object ID as string (e.g., "1") or integer (e.g., 1) + + Returns: + Prim name string (e.g., "object_00") + """ + assert _SCENE_CTX is not None, "init_scene_from_scene_dict must be called first." + oid_str = str(oid) + if _SCENE_CTX.oid_to_index is None: + raise ValueError("oid_to_index mapping not available. Scene may not have been initialized from scene dict.") + index = _SCENE_CTX.oid_to_index.get(oid_str) + if index is None: + raise ValueError(f"OID '{oid_str}' not found in scene. Available OIDs: {list(_SCENE_CTX.oid_to_index.keys())}") + return f"object_{index:02d}" + + +def get_oid_from_prim_name(prim_name: str) -> str: + """ + Get the object id from prim name (e.g., "object_00" -> "1"). + + Args: + prim_name: Prim name string (e.g., "object_00") + + Returns: + OID as string (e.g., "1") + """ + assert _SCENE_CTX is not None, "init_scene_from_scene_dict must be called first." + if not prim_name.startswith("object_"): + raise ValueError(f"Invalid prim name format: {prim_name}") + try: + index = int(prim_name.split("_")[1]) + except (IndexError, ValueError): + raise ValueError(f"Invalid prim name format: {prim_name}") + if _SCENE_CTX.index_to_oid is None: + raise ValueError("index_to_oid mapping not available. Scene may not have been initialized from scene dict.") + oid = _SCENE_CTX.index_to_oid.get(index) + if oid is None: + raise ValueError(f"Index {index} not found in scene. Available indices: {list(_SCENE_CTX.index_to_oid.keys())}") + return oid + + def make_env( cfgs: dict, num_envs: int = 1, diff --git a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py old mode 100644 new mode 100755 index a4910a9..8719d69 --- a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py +++ b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py @@ -5,27 +5,34 @@ from __future__ import annotations # ─────────── AppLauncher ─────────── -import argparse, os, json, random, transforms3d +import argparse, os, json, random, transforms3d, typing +from typing import Optional from pathlib import Path import numpy as np import torch import yaml from isaaclab.app import AppLauncher +import sys +from modules.envs.task_cfg import TaskCfg, TaskType, SuccessMetric, SuccessMetricType, TrajectoryCfg, RobotType +from modules.envs.task_construct import construct_task_config, add_reference_trajectory, load_task_cfg +file_path = Path(__file__).resolve() +sys.path.append(str(file_path.parent)) +sys.path.append(str(file_path.parent.parent)) # ─────────── CLI ─────────── -parser = argparse.ArgumentParser("sim_policy") -parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") -parser.add_argument("--robot", type=str, default="franka") -parser.add_argument("--num_envs", type=int, default=1) -parser.add_argument("--num_trials", type=int, default=1) -parser.add_argument("--teleop_device", type=str, default="keyboard") -parser.add_argument("--sensitivity", type=float, default=1.0) -AppLauncher.add_app_launcher_args(parser) -args_cli = parser.parse_args() -args_cli.enable_cameras = True -args_cli.headless = False # headless mode for batch execution -app_launcher = AppLauncher(vars(args_cli)) -simulation_app = app_launcher.app +# parser = argparse.ArgumentParser("sim_policy") +# parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") +# parser.add_argument("--robot", type=str, default="franka") +# parser.add_argument("--num_envs", type=int, default=1) +# parser.add_argument("--num_trials", type=int, default=1) +# parser.add_argument("--teleop_device", type=str, default="keyboard") +# parser.add_argument("--sensitivity", type=float, default=1.0) +# AppLauncher.add_app_launcher_args(parser) +# args_cli = parser.parse_args() +# args_cli.enable_cameras = True +# args_cli.headless = True # headless mode for batch execution +# app_launcher = AppLauncher(vars(args_cli)) +# simulation_app = app_launcher.app # ─────────── Runtime imports ─────────── import isaaclab.sim as sim_utils @@ -37,13 +44,11 @@ # ─────────── Simulation environments ─────────── from sim_base import BaseSimulator, get_next_demo_id from sim_env_factory import make_env -from sim_preprocess.grasp_utils import get_best_grasp_with_hints +from modules.grasp_preprocess.grasp_utils import get_best_grasp_with_hints from sim_utils.transform_utils import pose_to_mat, mat_to_pose, grasp_to_world, grasp_approach_axis_batch from sim_utils.sim_utils import load_sim_parameters -BASE_DIR = Path.cwd() -img_folder = args_cli.key -out_dir = BASE_DIR / "outputs" + # ──────────────────────────── Heuristic Manipulation ──────────────────────────── @@ -54,28 +59,29 @@ class HeuristicManipulation(BaseSimulator): • Every attempt does reset → pre → grasp → close → lift → check; • Early stops when any env succeeds; then re-exec for logging. """ - def __init__(self, sim, scene, sim_cfgs: dict): + def __init__(self, sim, scene, sim_cfgs: dict, args, out_dir: Path, img_folder: str, demo_dir: Path, data_dir: Path): robot_pose = torch.tensor( sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=sim.device ) super().__init__( - sim=sim, scene=scene, args=args_cli, + sim=sim, scene=scene, args=args, robot_pose=robot_pose, cam_dict=sim_cfgs["cam_cfg"], - out_dir=out_dir, img_folder=img_folder, + out_dir=out_dir, img_folder=img_folder, demo_dir = demo_dir, data_dir = data_dir, enable_motion_planning=True, set_physics_props=True, debug_level=0, ) - + self.final_gripper_closed = sim_cfgs["demo_cfg"]["final_gripper_closed"] self.selected_object_id = sim_cfgs["demo_cfg"]["manip_object_id"] - self.traj_key = sim_cfgs["demo_cfg"]["traj_key"] self.traj_path = sim_cfgs["demo_cfg"]["traj_path"] self.goal_offset = [0, 0, sim_cfgs["demo_cfg"]["goal_offset"]] self.grasp_path = sim_cfgs["demo_cfg"]["grasp_path"] self.grasp_idx = sim_cfgs["demo_cfg"]["grasp_idx"] self.grasp_pre = sim_cfgs["demo_cfg"]["grasp_pre"] self.grasp_delta = sim_cfgs["demo_cfg"]["grasp_delta"] + self.task_type = sim_cfgs["demo_cfg"]["task_type"] + self.robot_type = args.robot self.load_obj_goal_traj() def load_obj_goal_traj(self): @@ -91,7 +97,7 @@ def load_obj_goal_traj(self): """ # —— 1) Load Δ_w —— rel = np.load(self.traj_path).astype(np.float32) - self.obj_rel_traj = rel # (T,4,4) + self.obj_rel_traj = rel[1:, :, :] # (T,4,4) self.reset() @@ -108,6 +114,7 @@ def load_obj_goal_traj(self): # Note: here the relative traj Δ_w is defined in world frame with origin (0,0,0), # Hence, we need to normalize it to each env's origin frame. origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32) # (B,3) + obj_state_np[:, :3] -= origins # normalize to env origin frame # —— 3) Precompute absolute object goals for all envs —— @@ -121,7 +128,7 @@ def load_obj_goal_traj(self): obj_goal[b, t] = goal self.obj_goal_traj_w = obj_goal # [B, T, 4, 4] - + def follow_object_goals(self, start_joint_pos, sample_step=1, visualize=True): """ follow precompute object absolute trajectory: self.obj_goal_traj_w: @@ -173,9 +180,103 @@ def follow_object_goals(self, start_joint_pos, sample_step=1, visualize=True): joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False) self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1)) + success_ids = self.is_success() + joint_pos = self.wait(gripper_open=not self.final_gripper_closed, steps=30) + return joint_pos, success_ids + + def follow_object_relative_goals(self, start_joint_pos, sample_step=1, visualize=True): + B = self.scene.num_envs + obj_goal_all = self.obj_goal_traj_w # [B, T, 4, 4] + T = obj_goal_all.shape[1] + + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] # [B,7] + # obj_w = self.object_prim.data.root_com_state_w[:, :7] # [B,7] + current_ee_pos_w = ee_w[:, :3] + current_ee_quat_w = ee_w[:, 3:7] + + joint_pos = start_joint_pos + root_w = self.robot.data.root_state_w[:, 0:7] # robot base poses per env + + t_iter = list(range(0, T, sample_step)) + t_iter = t_iter + [T-1] if t_iter[-1] != T-1 else t_iter + + for t in t_iter: + goal_pos_list, goal_quat_list = [], [] + print(f"[INFO] follow object goal step {t}/{T}") + for b in range(B): + # (4,4) + current_T_ee = pose_to_mat(current_ee_pos_w[b], current_ee_quat_w[b]) + T_ee_goal = self.obj_rel_traj[t] @ current_T_ee + pos_b, quat_b = mat_to_pose(T_ee_goal) + goal_pos_list.append(pos_b.astype(np.float32)) + goal_quat_list.append(quat_b.astype(np.float32)) + + goal_pos = torch.as_tensor(np.stack(goal_pos_list), dtype=torch.float32, device=self.sim.device) + goal_quat = torch.as_tensor(np.stack(goal_quat_list), dtype=torch.float32, device=self.sim.device) + + if visualize: + self.show_goal(goal_pos, goal_quat) + ee_pos_b, ee_quat_b = subtract_frame_transforms( + root_w[:, :3], root_w[:, 3:7], goal_pos, goal_quat + ) + joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False) + current_ee_pos_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:3] + current_ee_quat_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 3:7] + + self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1)) + + success_ids = self.is_success() + joint_pos = self.wait(gripper_open=not self.final_gripper_closed, steps=30) + return joint_pos, success_ids + + + def follow_object_centers(self, start_joint_pos, sample_step=1, visualize=True): + B = self.scene.num_envs + obj_goal_all = self.obj_goal_traj_w # [B, T, 4, 4] + T = obj_goal_all.shape[1] + + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] # [B,7] + # obj_w = self.object_prim.data.root_com_state_w[:, :7] # [B,7] + obj_w = self.object_prim.data.root_state_w[:, :7] # [B,7] + + T_ee_in_obj = [] + for b in range(B): + T_ee_w = pose_to_mat(ee_w[b, :3], ee_w[b, 3:7]) + T_obj_w = pose_to_mat(obj_w[b, :3], obj_w[b, 3:7]) + T_ee_in_obj.append((np.linalg.inv(T_obj_w) @ T_ee_w).astype(np.float32)) + + joint_pos = start_joint_pos + root_w = self.robot.data.root_state_w[:, 0:7] # robot base poses per env + + t_iter = list(range(0, T, sample_step)) + t_iter = t_iter + [T-1] if t_iter[-1] != T-1 else t_iter + + for t in t_iter: + goal_pos_list, goal_quat_list = [], [] + print(f"[INFO] follow object goal step {t}/{T}") + for b in range(B): + T_obj_goal = obj_goal_all[b, t] # (4,4) + T_ee_goal = T_obj_goal @ T_ee_in_obj[b] # (4,4) + pos_b, quat_b = mat_to_pose(T_ee_goal) + + goal_pos_list.append(pos_b.astype(np.float32)) + goal_quat_list.append(quat_b.astype(np.float32)) + + goal_pos = torch.as_tensor(np.stack(goal_pos_list), dtype=torch.float32, device=self.sim.device) + goal_quat = ee_w[:, 3:7] + + if visualize: + self.show_goal(goal_pos, goal_quat) + ee_pos_b, ee_quat_b = subtract_frame_transforms( + root_w[:, :3], root_w[:, 3:7], goal_pos, goal_quat + ) + joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False) + self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1)) + is_success = self.is_success() + joint_pos = self.wait(gripper_open=not self.final_gripper_closed, steps=30) + return joint_pos, is_success + - joint_pos = self.wait(gripper_open=True, steps=10) - return joint_pos def viz_object_goals(self, sample_step=1, hold_steps=20): self.reset() @@ -278,7 +379,7 @@ def execute_and_lift_once_batch(self, info: dict, lift_height=0.12) -> tuple[np. score[lifted] = 1000.0 return lifted.detach().cpu().numpy().astype(bool), score.detach().cpu().numpy().astype(np.float32) - def lift_up(self, height=0.12, gripper_open=False): + def lift_up(self, height=0.12, gripper_open=False, steps=8): """ Lift up by a certain height (m) from current EE pose. """ @@ -290,17 +391,16 @@ def lift_up(self, height=0.12, gripper_open=False): p_lift_b, q_lift_b = subtract_frame_transforms( root[:, 0:3], root[:, 3:7], target_p, ee_w[:, 3:7] - ) + ) # [B,3], [B,4] jp, success = self.move_to(p_lift_b, q_lift_b, gripper_open=gripper_open) - jp = self.wait(gripper_open=gripper_open, steps=8) + jp = self.wait(gripper_open=gripper_open, steps=steps) return jp - - def is_success(self) -> bool: + + def is_success(self) -> torch.Tensor: ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] obj_w = self.object_prim.data.root_com_pos_w[:, 0:3] - dist = torch.norm(obj_w[:, :3] - ee_w[:, :3], dim=1).mean().item() - print("Success!" if dist < 0.10 else "Fail!") - return dist < 0.10 + dist = torch.norm(obj_w[:, :3] - ee_w[:, :3], dim=1) # [B] + return (dist < 0.15).to(torch.bool) def build_grasp_info( self, @@ -371,7 +471,7 @@ def grasp_trials(self, gg, std: float = 0.0005): self.show_goal(grasp_pos_w_batch, grasp_quat_w_batch) # random disturbance along approach axis pre_dist_batch = np.full((B,), pre_dist_const, dtype=np.float32) - delta_batch = rng.normal(0.0, std, size=(B,)).astype(np.float32) + delta_batch = rng.normal(-0.002, std, size=(B,)).astype(np.float32) info = self.build_grasp_info(grasp_pos_w_batch, grasp_quat_w_batch, pre_dist_batch, delta_batch) @@ -423,7 +523,7 @@ def replay_actions(self, actions: np.ndarray): jp = self.wait(gripper_open=g_b, steps=3) return True - def inference(self, std: float = 0.0) -> bool: + def inference(self, std: float = 0.0) -> list[int]: """ Main function of the heuristic manipulation policy. Physical trial-and-error grasping with approach-axis perturbation: @@ -439,14 +539,14 @@ def inference(self, std: float = 0.0) -> bool: npy_path = self.grasp_path if npy_path is None or (not os.path.exists(npy_path)): print(f"[ERR] grasps npy not found: {npy_path}") - return False + return [] gg = GraspGroup().from_npy(npy_file_path=npy_path) gg = get_best_grasp_with_hints(gg, point=None, direction=[0, 0, -1]) # [0, 0, -1] if self.grasp_idx >= 0: if self.grasp_idx >= len(gg): print(f"[ERR] grasp_idx {self.grasp_idx} out of range [0,{len(gg)})") - return False + return [] print(f"[INFO] using fixed grasp index {self.grasp_idx} for all envs.") p_w, q_w = grasp_to_world(gg[int(self.grasp_idx)]) ret = { @@ -460,7 +560,9 @@ def inference(self, std: float = 0.0) -> bool: ret = self.grasp_trials(gg, std=std) print("[INFO] Re-exec all envs with the winning grasp, then follow object goals.") - + if ret is None or ret["success"] == False: + print("[ERR] no proposal succeeded to lift after full search.") + return [] p_win, q_win = ret["chosen_pose_w"] p_all = np.repeat(p_win.reshape(1, 3), B, axis=0) q_all = np.repeat(q_win.reshape(1, 4), B, axis=0) @@ -471,7 +573,7 @@ def inference(self, std: float = 0.0) -> bool: # reset and conduct main process: open→pre→grasp→close→follow_object_goals self.reset() - + #print(self.object_prim.data.root_state_w[:, :7].cpu().numpy()) cam_p = self.camera.data.pos_w cam_q = self.camera.data.quat_w_ros gp_w = torch.as_tensor(info_all["p_w"], dtype=torch.float32, device=self.sim.device) @@ -479,20 +581,23 @@ def inference(self, std: float = 0.0) -> bool: pre_w = torch.as_tensor(info_all["pre_p_w"], dtype=torch.float32, device=self.sim.device) gp_cam, gq_cam = subtract_frame_transforms(cam_p, cam_q, gp_w, gq_w) pre_cam, pre_qcm = subtract_frame_transforms(cam_p, cam_q, pre_w, gq_w) - self.save_dict["grasp_pose_cam"] = torch.cat([gp_cam, gq_cam], dim=1).unsqueeze(0).cpu().numpy() - self.save_dict["pregrasp_pose_cam"] = torch.cat([pre_cam, pre_qcm], dim=1).unsqueeze(0).cpu().numpy() + + self.save_dict["grasp_pose_w"] = torch.cat([gp_w, gq_w], dim=1).cpu().numpy() + self.save_dict["pregrasp_pose_w"] = torch.cat([pre_w, gq_w], dim=1).cpu().numpy() + self.save_dict["grasp_pose_cam"] = torch.cat([gp_cam, gq_cam], dim=1).cpu().numpy() + self.save_dict["pregrasp_pose_cam"] = torch.cat([pre_cam, pre_qcm], dim=1).cpu().numpy() jp = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids] self.wait(gripper_open=True, steps=4) # pre → grasp jp, success = self.move_to(info_all["pre_p_b"], info_all["pre_q_b"], gripper_open=True) - if torch.any(success==False): return False + if torch.any(success==False): return [] self.save_dict["actions"].append(np.concatenate([info_all["pre_p_b"].cpu().numpy(), info_all["pre_q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) jp = self.wait(gripper_open=True, steps=3) jp, success = self.move_to(info_all["p_b"], info_all["q_b"], gripper_open=True) - if torch.any(success==False): return False + if torch.any(success==False): return [] self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) # close gripper @@ -500,43 +605,244 @@ def inference(self, std: float = 0.0) -> bool: self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.ones((B, 1))], axis=1)) # object goal following - # self.lift_up(height=0.05, gripper_open=False) - jp = self.follow_object_goals(jp, sample_step=5, visualize=True) + self.lift_up(height=self.goal_offset[2], gripper_open=False, steps=8) + #jp = self.follow_object_goals(jp, sample_step=5, visualize=True) + if self.task_type == "simple_pick_place" or self.task_type == "simple_pick": + jp, success_ids = self.follow_object_centers(jp, sample_step=1, visualize=True) + elif self.task_type == "targetted_pick_place": + jp, success_ids = self.follow_object_goals(jp, sample_step=1, visualize=True) + else: + raise ValueError(f"[ERR] Invalid task type: {self.task_type}") + + object_prim_world_pose = self.object_prim.data.root_state_w[:, :7].cpu().numpy() + object_prim_world_pose[:, :3] = object_prim_world_pose[:, :3] - self.scene.env_origins.cpu().numpy() + self.save_dict["final_object_world_pose"] = object_prim_world_pose + + robot_world_pose = self.robot.data.root_state_w[:, :7].cpu().numpy() + robot_world_pose[:, :3] = robot_world_pose[:, :3] - self.scene.env_origins.cpu().numpy() + self.save_dict["final_robot_world_pose"] = robot_world_pose + + + + # Properly handle the case when success_ids is a numpy array + # Convert it to a torch tensor if needed, before calling torch.where + print(f"[INFO] success_ids: {success_ids}") + # If success_ids is already a tensor, we keep as-is + success_ids = torch.where(success_ids)[0].cpu().numpy() + + + return success_ids + + + def from_data_to_task_cfg(self, key:str) -> TaskCfg: + BASE_DIR = Path.cwd() + scene_json_path = BASE_DIR / "outputs" / key / "simulation" / "scene.json" + task_base_folder = BASE_DIR / "tasks" + scene_dict = json.load(open(scene_json_path)) + task_cfg, base_folder = construct_task_config(key, scene_dict, task_base_folder) + robot_pose = self.save_dict["final_robot_world_pose"] + robot_pose = robot_pose[0] + robot_pose = robot_pose.tolist() + + object_trajectory = np.load(self.traj_path).astype(np.float32) + pose_quat_traj = [] + for pose_mat in object_trajectory: + pose, quat = mat_to_pose(pose_mat) + pose_quat = np.concatenate([np.array(pose), np.array(quat)]) + pose_quat_traj.append(pose_quat) + pose_quat_traj = np.array(pose_quat_traj).reshape(-1, 7).tolist() + + # Convert pregrasp and grasp poses from world to env-local frame + pregrasp_pose_world = np.array(self.save_dict["pregrasp_pose_w"]) # [B, 7] + grasp_pose_world = np.array(self.save_dict["grasp_pose_w"]) # [B, 7] + env_origins = self.scene.env_origins.cpu().numpy() # [B, 3] + + pregrasp_pose_local = np.array(pregrasp_pose_world.copy()) + pregrasp_pose_local[:, :3] = pregrasp_pose_world[:, :3] - env_origins + pregrasp_pose = pregrasp_pose_local[0].tolist() # Take first env + + grasp_pose_local = grasp_pose_world.copy() + grasp_pose_local[:, :3] = grasp_pose_world[:, :3] - env_origins + grasp_pose = grasp_pose_local[0].tolist() # Take first env + + final_gripper_close = self.final_gripper_closed + + if task_cfg.task_type == TaskType.TARGETTED_PICK_PLACE: + success_metric = SuccessMetric( + success_metric_type = SuccessMetricType.TARGET_POINT, + end_pose = self.save_dict["final_object_world_pose"][0].tolist(), + final_gripper_close = final_gripper_close, + ) + elif task_cfg.task_type == TaskType.SIMPLE_PICK_PLACE: + final_object_world_pose = self.save_dict["final_object_world_pose"][0] + ground_value = float(final_object_world_pose[2]) + success_metric = SuccessMetric( + success_metric_type = SuccessMetricType.TARGET_PLANE, + ground_value = ground_value, + final_gripper_close = final_gripper_close, + end_pose = self.save_dict["final_object_world_pose"][0].tolist() + ) + else: + success_metric = SuccessMetric( + success_metric_type = SuccessMetricType.SIMPLE_LIFT, + lift_height = 0.05, + final_gripper_close = final_gripper_close, + end_pose = self.save_dict["final_object_world_pose"][0].tolist() + ) + object_poses = {} + for obj in task_cfg.objects: + object_poses[obj.object_id] = [0, 0, 0, 1, 0, 0, 0] + + if self.robot_type == 'franka': + robot_type = RobotType.FRANKA + elif self.robot_type == 'ur5': + robot_type = RobotType.UR5 + else: + raise ValueError(f"[ERR] Invalid robot type: {self.robot_type}") + trajectory_cfg = TrajectoryCfg( + robot_pose = robot_pose, + object_poses = object_poses, + object_trajectory = pose_quat_traj, + final_gripper_close = final_gripper_close, + success_metric = success_metric, + pregrasp_pose = pregrasp_pose, + grasp_pose = grasp_pose, + robot_type = robot_type, + ) + add_reference_trajectory(task_cfg, trajectory_cfg, base_folder) + return task_cfg + + +# ─────robot_pose─────────────────────── Entry Point ──────────────────────────── + + + + +# def main(): +# sim_cfgs = load_sim_parameters(BASE_DIR, args_cli.key) +# env, _ = make_env( +# cfgs=sim_cfgs, num_envs=args_cli.num_envs, +# device=args_cli.device, +# bg_simplify=False, +# ) +# sim, scene = env.sim, env.scene + +# my_sim = HeuristicManipulation( +# sim, scene, sim_cfgs=sim_cfgs, +# args=args_cli, out_dir=out_dir, img_folder=img_folder +# ) + +# demo_root = (out_dir / img_folder / "demos").resolve() + +# for _ in range(args_cli.num_trials): + +# robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device) # [7], pos(3)+quat(wxyz)(4) +# my_sim.set_robot_pose(robot_pose) +# my_sim.demo_id = get_next_demo_id(demo_root) +# my_sim.reset() +# print(f"[INFO] start simulation demo_{my_sim.demo_id}") +# # Note: if you set viz_object_goals(), remember to disable gravity and collision for object +# # my_sim.viz_object_goals(sample_step=10, hold_steps=40) +# success_ids = my_sim.inference() +# print(f"[INFO] success_ids: {success_ids}") +# if success_ids is not None and len(success_ids) > 0: +# success = True +# my_sim.from_data_to_task_cfg(args_cli.key) +# break +# else: +# success = False + +# # actions = np.load("outputs/lab16/demos/demo_5/env_000/actions.npy") +# # my_sim.replay_actions(actions) + +# env.close() +# simulation_app.close() + + + + + +def sim_heuristic_manip(keys: list[str], args_cli: argparse.Namespace, config_path: Optional[str] = None, config_dict: Optional[dict] = None): + """` + Run heuristic manipulation simulation. + + Args: + keys: List of scene keys to run (e.g., ["demo_video"]) + args_cli: Command-line arguments (argparse.Namespace) + config_path: Path to config file (yaml/json) - alternative to args_cli + config_dict: Config dictionary - alternative to args_cli + + Usage: + # Option 1: Use command-line args (original) + sim_heuristic_manip(["demo_video"], args_cli) + + # Option 2: Use config file + sim_heuristic_manip(["demo_video"], config_path="config.yaml") + + # Option 3: Use config dict + sim_heuristic_manip(["demo_video"], config_dict={"num_envs": 4, "num_trials": 10}) + """ + # Create args from config if not provided + # if args_cli is None: + # args_cli = create_args_from_config(config_path=config_path, config_dict=config_dict) + BASE_DIR = Path.cwd() + + + out_dir = BASE_DIR / "outputs" + local_out_dir = BASE_DIR / "outputs" + + for key in keys: + args_cli.key = key + local_img_folder = key + demo_dir = BASE_DIR / "tasks" / key / "demos" + data_dir = BASE_DIR / "data" / key + sim_cfgs = load_sim_parameters(BASE_DIR, key) + env, _ = make_env( + cfgs=sim_cfgs, num_envs=args_cli.num_envs, + device=args_cli.device, + bg_simplify=False, + ) + sim, scene = env.sim, env.scene - self.save_data(ignore_keys=["segmask", "depth"]) - return True + my_sim = HeuristicManipulation( + sim, scene, sim_cfgs=sim_cfgs, + args=args_cli, out_dir=local_out_dir, img_folder=local_img_folder, + demo_dir = demo_dir, data_dir = data_dir ) -# ──────────────────────────── Entry Point ──────────────────────────── -def main(): - sim_cfgs = load_sim_parameters(BASE_DIR, args_cli.key) - env, _ = make_env( - cfgs=sim_cfgs, num_envs=args_cli.num_envs, - device=args_cli.device, - bg_simplify=False, - ) - sim, scene = env.sim, env.scene - - my_sim = HeuristicManipulation(sim, scene, sim_cfgs=sim_cfgs) - - demo_root = (out_dir / img_folder / "demos").resolve() - - for _ in range(args_cli.num_trials): - - robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device) # [7], pos(3)+quat(wxyz)(4) - my_sim.set_robot_pose(robot_pose) - my_sim.demo_id = get_next_demo_id(demo_root) - my_sim.reset() - print(f"[INFO] start simulation demo_{my_sim.demo_id}") - # Note: if you set viz_object_goals(), remember to disable gravity and collision for object - # my_sim.viz_object_goals(sample_step=10, hold_steps=40) - my_sim.inference() - # actions = np.load("outputs/lab16/demos/demo_5/env_000/actions.npy") - # my_sim.replay_actions(actions) - - env.close() - simulation_app.close() - -if __name__ == "__main__": - main() - os.system("quit()") - simulation_app.close() + demo_root = (local_out_dir / key / "demos").resolve() + success = False + for i in range(args_cli.num_trials): + + robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device) # [7], pos(3)+quat(wxyz)(4) + my_sim.set_robot_pose(robot_pose) + my_sim.demo_id = get_next_demo_id(demo_root) + my_sim.reset() + print(f"[INFO] start simulation demo_{my_sim.demo_id}") + # Note: if you set viz_object_goals(), remember to disable gravity and collision for object + # my_sim.viz_object_goals(sample_step=10, hold_steps=40) + std = 0 + if i > 0: + std = std + 0.001 + success_ids = my_sim.inference(std=std) + print(f"[INFO] success_ids: {success_ids}") + if len(success_ids) > 0: + success = True + task_cfg = my_sim.from_data_to_task_cfg(key) + my_sim.task_cfg = task_cfg + my_sim.trajectory_cfg_list = [task_cfg.reference_trajectory] + my_sim.save_data(ignore_keys=["segmask", "depth"], env_ids=success_ids[:1]) + break + # actions = np.load("outputs/lab16/demos/demo_5/env_000/actions.npy") + # my_sim.replay_actions(actions) + if success == False: + print("[ERR] no successful environments!") + env.close() + #simulation_app.close() + return True + + + +#if __name__ == "__main__": + #main() + #os.system("quit()") + #simulation_app.close() diff --git a/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py b/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py old mode 100644 new mode 100755 index 6ce5594..1056fcd --- a/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py +++ b/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py @@ -381,7 +381,7 @@ def count_rigid_api(usd_path): def run_conversion_for_key(key: str): global args - scene_json = out_dir / key / "scene" / "scene.json" + scene_json = out_dir / key / "simulation" / "scene.json" assert scene_json.exists(), f"[{key}] scene.json not found: {scene_json}" scene_dict = json.load(open(scene_json, "r")) @@ -552,6 +552,14 @@ def run_conversion(scene_dict: dict): log.info(f'Rigid body count: {count_rigid_api(output_path)}') log.info(f"Saved USD file to {os.path.abspath(output_path)}") + +def usd_conversion(keys): + for key in keys: + log.info(f"\n========== [USD Convert] key: {key} ==========") + run_conversion_for_key(key) + simulation_app.close() + print('[Info] USD conversion completed.') + ######################################################## ## Batch entry ######################################################## diff --git a/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py b/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py new file mode 100755 index 0000000..530995a --- /dev/null +++ b/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py @@ -0,0 +1,600 @@ +""" +Heuristic manipulation policy in Isaac Lab simulation. +Using grasping and motion planning to perform object manipulation tasks. +""" +from __future__ import annotations + +# ─────────── AppLauncher ─────────── +import argparse, os, json, random, transforms3d +from pathlib import Path +import numpy as np +import torch +import yaml +import sys +from isaaclab.app import AppLauncher +from typing import Optional, List +from modules.envs.task_cfg import TaskCfg, TaskType, SuccessMetric, SuccessMetricType, TrajectoryCfg +from modules.envs.task_construct import construct_task_config, add_reference_trajectory, load_task_cfg +from modules.envs.randomizer import Randomizer +file_path = Path(__file__).resolve() +import imageio + +sys.path.append(str(file_path.parent)) +sys.path.append(str(file_path.parent.parent)) + +# ─────────── CLI ─────────── +# parser = argparse.ArgumentParser("sim_policy") +# parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") +# parser.add_argument("--robot", type=str, default="franka") +# parser.add_argument("--num_envs", type=int, default=1) +# parser.add_argument("--num_trials", type=int, default=1) +# parser.add_argument("--teleop_device", type=str, default="keyboard") +# parser.add_argument("--sensitivity", type=float, default=1.0) +# AppLauncher.add_app_launcher_args(parser) +# args_cli = parser.parse_args() +# args_cli.enable_cameras = True +# args_cli.headless = False # headless mode for batch execution +# app_launcher = AppLauncher(vars(args_cli)) +# simulation_app = app_launcher.app + +# ─────────── Runtime imports ─────────── +import isaaclab.sim as sim_utils +from isaaclab.utils.math import subtract_frame_transforms + +from graspnetAPI.grasp import GraspGroup + + +# ─────────── Simulation environments ─────────── +from sim_base import BaseSimulator, get_next_demo_id +from sim_env_factory import make_env +from modules.grasp_preprocess.grasp_utils import get_best_grasp_with_hints +from isaaclab.sim_utils.transform_utils import pose_to_mat, mat_to_pose, grasp_to_world, grasp_approach_axis_batch +from isaaclab.sim_utils.sim_utils import load_sim_parameters + +BASE_DIR = Path.cwd() + +out_dir = BASE_DIR / "outputs" + +def pose_distance(T1, T2): + """ + Compute translation and rotation (angle) distance between two SE(3) transformation matrices in torch. + Args: + T1, T2: [..., 4, 4] torch.Tensor or np.ndarray, can be batched + Returns: + trans_dist: translation distance(s) + angle: rotational angle(s) + """ + if not torch.is_tensor(T1): + T1 = torch.tensor(T1, dtype=torch.float32) + if not torch.is_tensor(T2): + T2 = torch.tensor(T2, dtype=torch.float32) + + # Translation distance + t1 = T1[..., :3, 3] + t2 = T2[..., :3, 3] + trans_dist = torch.norm(t2 - t1, dim=-1) + + # Rotation difference (angle) + R1 = T1[..., :3, :3] + R2 = T2[..., :3, :3] + dR = torch.matmul(R2, R1.transpose(-2, -1)) + trace = dR[..., 0, 0] + dR[..., 1, 1] + dR[..., 2, 2] + cos_angle = (trace - 1) / 2 + + # Handle numerical precision + cos_angle = torch.clamp(cos_angle, -1.0, 1.0) + angle = torch.acos(cos_angle) + return trans_dist, angle + +def compute_poses_from_traj_cfg(traj_cfg_list): + """ + Extract poses and trajectories from a list of TrajectoryCfg objects. + + Args: + traj_cfg_list: List of TrajectoryCfg objects + + Returns: + robot_poses_list: List of robot poses [7] for each trajectory + object_poses_dict: Dict mapping oid -> list of (pos, quat) tuples + object_trajectory_list: List of object trajectories + final_gripper_state_list: List of final gripper states + grasping_phase_list: List of grasping phases + placing_phrase_list: List of placing phases + """ + robot_poses_list = [] + object_poses_dict = {} # {oid: [(pos, quat), ...]} + object_trajectory_list = [] + final_gripper_state_list = [] + pregrasp_pose_list = [] + grasp_pose_list = [] + final_gripper_close_list = [] + end_pose_list = [] + + + for traj_cfg in traj_cfg_list: + robot_poses_list.append(traj_cfg.robot_pose) + + # Extract object poses: traj_cfg.object_poses is a list of (oid, pose) tuples + for oid in traj_cfg.object_poses.keys(): + pose = traj_cfg.object_poses[oid] + oid_str = str(oid) + if oid_str not in object_poses_dict: + object_poses_dict[oid_str] = [] + object_poses_dict[oid_str].append(np.array(pose, dtype=np.float32)) + traj = [] + for i in range(len(traj_cfg.object_trajectory)): + mat = pose_to_mat(traj_cfg.object_trajectory[i][:3], traj_cfg.object_trajectory[i][3:7]) + traj.append(mat) + object_trajectory_list.append(np.array(traj, dtype=np.float32)) + final_gripper_state_list.append(traj_cfg.final_gripper_close) + pregrasp_pose_list.append(np.array(traj_cfg.pregrasp_pose, dtype=np.float32)) + grasp_pose_list.append(np.array(traj_cfg.grasp_pose, dtype=np.float32)) + final_gripper_close_list.append(traj_cfg.final_gripper_close) + end_pose_list.append(np.array(traj_cfg.success_metric.end_pose, dtype=np.float32)) + + return robot_poses_list, object_poses_dict, object_trajectory_list, final_gripper_state_list, pregrasp_pose_list, grasp_pose_list, end_pose_list + + + +# ────────────────────────────Heuristic Manipulation ──────────────────────────── +class RandomizeExecution(BaseSimulator): + """ + Physical trial-and-error grasping with approach-axis perturbation: + • Multiple grasp proposals executed in parallel; + • Every attempt does reset → pre → grasp → close → lift → check; + • Early stops when any env succeeds; then re-exec for logging. + """ + def __init__(self, sim, scene, sim_cfgs: dict, demo_dir: Path, data_dir: Path, record: bool = True, args_cli: Optional[argparse.Namespace] = None, bg_rgb: Optional[np.ndarray] = None): + robot_pose = torch.tensor( + sim_cfgs["robot_cfg"]["robot_pose"], + dtype=torch.float32, + device=sim.device, + + ) + super().__init__( + sim=sim, scene=scene, args=args_cli, + robot_pose=robot_pose, cam_dict=sim_cfgs["cam_cfg"], + out_dir=out_dir, img_folder=args_cli.key, data_dir=data_dir, + demo_dir=demo_dir, + enable_motion_planning=True, + set_physics_props=True, debug_level=0, + ) + + self.selected_object_id = sim_cfgs["demo_cfg"]["manip_object_id"] + self._selected_object_id = str(self.selected_object_id) # Store as string for mapping + self._update_object_prim() # Update object_prim based on selected_object_id + self.record = record # Store whether to record data + #self.traj_cfg_list = traj_cfg_list + + self.task_type = sim_cfgs["demo_cfg"]["task_type"] + self.goal_offset = [0, 0, sim_cfgs["demo_cfg"]["goal_offset"]] + + + + + def reset(self, env_ids=None): + super().reset(env_ids) + device = self.object_prim.device + if env_ids is None: + env_ids_t = self._all_env_ids.to(device) # (B,) + else: + env_ids_t = torch.as_tensor(env_ids, device=device, dtype=torch.long).view(-1) # (M,) + M = int(env_ids_t.shape[0]) + + # --- object pose/vel: set object at env origins with identity quat --- + env_origins = self.scene.env_origins.to(device)[env_ids_t] # (M,3) + + # Set poses for all objects from object_poses_dict + from sim_env_factory import get_prim_name_from_oid + for oid in self.object_poses_dict.keys(): + # Get prim name from oid + prim_name = get_prim_name_from_oid(str(oid)) + + object_prim = self.scene[prim_name] + + # Get pose for this object (first pose in the list for now) + # object_poses_dict[oid] is a list of (pos, quat) tuples from mat_to_pose + if len(self.object_poses_dict[oid]) == 0: + continue + #import ipdb; ipdb.set_trace() + pos, quat = np.array(self.object_poses_dict[oid], dtype = np.float32)[env_ids_t.cpu().numpy(), :3], np.array(self.object_poses_dict[oid], dtype = np.float32)[env_ids_t.cpu().numpy(), 3:7] + object_pose = torch.zeros((M, 7), device=device, dtype=torch.float32) + object_pose[:, :3] = env_origins + torch.tensor(pos, dtype=torch.float32, device=device) + object_pose[:, 3:7] = torch.tensor(quat, dtype=torch.float32, device=device) # wxyz + + object_prim.write_root_pose_to_sim(object_pose, env_ids=env_ids_t) + object_prim.write_root_velocity_to_sim( + torch.zeros((M, 6), device=device, dtype=torch.float32), env_ids=env_ids_t + ) + + object_prim.write_data_to_sim() + + rp_local = np.array(self.robot_poses_list, dtype=np.float32) + env_origins_robot = self.scene.env_origins.to(device)[env_ids_t] + import copy + robot_pose_world = copy.deepcopy(rp_local) + robot_pose_world[:, :3] = env_origins_robot.cpu().numpy() + robot_pose_world[env_ids_t.cpu().numpy(), :3] + #robot_pose_world[:, 3:7] = [1.0, 0.0, 0.0, 0.0] + self.robot.write_root_pose_to_sim(torch.tensor(robot_pose_world, dtype=torch.float32, device=device), env_ids=env_ids_t) + self.robot.write_root_velocity_to_sim( + torch.zeros((M, 6), device=device, dtype=torch.float32), env_ids=env_ids_t + ) + + + joint_pos = self.robot.data.default_joint_pos.to(self.robot.device)[env_ids_t] # (M,7) + joint_vel = self.robot.data.default_joint_vel.to(self.robot.device)[env_ids_t] # (M,7) + self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids_t) + self.robot.write_data_to_sim() + + self.clear_data() + + def compute_components(self): + self.robot_poses_list, self.object_poses_dict, self.object_trajectory_list, self.final_gripper_state_list, self.pregrasp_pose_list, self.grasp_pose_list, self.end_pose_list = compute_poses_from_traj_cfg(self.traj_cfg_list) + + + def compute_object_goal_traj(self): + B = self.scene.num_envs + # obj_state = self.object_prim.data.root_com_state_w[:, :7] # [B,7], pos(3)+quat(wxyz)(4) + obj_state = self.object_prim.data.root_state_w[:, :7] # [B,7], pos(3)+quat(wxyz)(4) + self.show_goal(obj_state[:, :3], obj_state[:, 3:7]) + + obj_state_np = obj_state.detach().cpu().numpy().astype(np.float32) + offset_np = np.asarray(self.goal_offset, dtype=np.float32) + obj_state_np[:, :3] += offset_np # raise a bit to avoid collision + + # Note: here the relative traj Δ_w is defined in world frame with origin (0,0,0), + # Hence, we need to normalize it to each env's origin frame. + origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32) # (B,3) + obj_state_np[:, :3] -= origins # normalize to env origin frame + + # —— 3) Precompute absolute object goals for all envs —— + T = self.object_trajectory_list[0].shape[0] + obj_goal = np.zeros((B, T, 4, 4), dtype=np.float32) + for b in range(B): + T_init = pose_to_mat(obj_state_np[b, :3], obj_state_np[b, 3:7]) # (4,4) + for t in range(1, T): + goal = self.object_trajectory_list[b][t] @ T_init + goal[:3, 3] += origins[b] # back to world frame + obj_goal[b, t] = goal + + self.obj_goal_traj_w = obj_goal + + def follow_object_goals(self, start_joint_pos, sample_step=1, visualize=True): + """ + follow precompute object absolute trajectory: self.obj_goal_traj_w: + T_obj_goal[t] = Δ_w[t] @ T_obj_init + + EE-object transform is fixed: + T_ee_goal[t] = T_obj_goal[t] @ (T_obj_grasp^{-1} @ T_ee_grasp) + Here T_obj_grasp / T_ee_grasp is the transform at the moment of grasping. + """ + + B = self.scene.num_envs + obj_goal_all = self.obj_goal_traj_w # [B, T, 4, 4] + T = obj_goal_all.shape[1] + + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] # [B,7] + # obj_w = self.object_prim.data.root_com_state_w[:, :7] # [B,7] + obj_w = self.object_prim.data.root_state_w[:, :7] # [B,7] + + T_ee_in_obj = [] + for b in range(B): + T_ee_w = pose_to_mat(ee_w[b, :3], ee_w[b, 3:7]) + T_obj_w = pose_to_mat(obj_w[b, :3], obj_w[b, 3:7]) + T_ee_in_obj.append((np.linalg.inv(T_obj_w) @ T_ee_w).astype(np.float32)) + + joint_pos = start_joint_pos + root_w = self.robot.data.root_state_w[:, 0:7] # robot base poses per env + + t_iter = list(range(0, T, sample_step)) + t_iter = t_iter + [T-1] if t_iter[-1] != T-1 else t_iter + + for t in t_iter: + goal_pos_list, goal_quat_list = [], [] + print(f"[INFO] follow object goal step {t}/{T}") + for b in range(B): + T_obj_goal = obj_goal_all[b, t] # (4,4) + T_ee_goal = T_obj_goal @ T_ee_in_obj[b] # (4,4) + pos_b, quat_b = mat_to_pose(T_ee_goal) + goal_pos_list.append(pos_b.astype(np.float32)) + goal_quat_list.append(quat_b.astype(np.float32)) + + goal_pos = torch.as_tensor(np.stack(goal_pos_list), dtype=torch.float32, device=self.sim.device) + goal_quat = torch.as_tensor(np.stack(goal_quat_list), dtype=torch.float32, device=self.sim.device) + + if visualize: + self.show_goal(goal_pos, goal_quat) + ee_pos_b, ee_quat_b = subtract_frame_transforms( + root_w[:, :3], root_w[:, 3:7], goal_pos, goal_quat + ) + joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False, record = self.record) + self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1)) + + is_grasp_success = self.is_grasp_success() + for b in range(B): + if self.final_gripper_state_list[b]: + self.wait(gripper_open=False, steps=10, record = self.record) + else: + self.wait(gripper_open=True, steps=10, record = self.record) + + return joint_pos, is_grasp_success + + + def follow_object_centers(self, start_joint_pos, sample_step=1, visualize=True): + B = self.scene.num_envs + obj_goal_all = self.obj_goal_traj_w # [B, T, 4, 4] + T = obj_goal_all.shape[1] + + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] # [B,7] + # obj_w = self.object_prim.data.root_com_state_w[:, :7] # [B,7] + obj_w = self.object_prim.data.root_state_w[:, :7] # [B,7] + + T_ee_in_obj = [] + for b in range(B): + T_ee_w = pose_to_mat(ee_w[b, :3], ee_w[b, 3:7]) + T_obj_w = pose_to_mat(obj_w[b, :3], obj_w[b, 3:7]) + T_ee_in_obj.append((np.linalg.inv(T_obj_w) @ T_ee_w).astype(np.float32)) + + joint_pos = start_joint_pos + root_w = self.robot.data.root_state_w[:, 0:7] # robot base poses per env + + t_iter = list(range(0, T, sample_step)) + t_iter = t_iter + [T-1] if t_iter[-1] != T-1 else t_iter + + for t in t_iter: + goal_pos_list, goal_quat_list = [], [] + print(f"[INFO] follow object goal step {t}/{T}") + for b in range(B): + T_obj_goal = obj_goal_all[b, t] # (4,4) + T_ee_goal = T_obj_goal @ T_ee_in_obj[b] # (4,4) + pos_b, quat_b = mat_to_pose(T_ee_goal) + + goal_pos_list.append(pos_b.astype(np.float32)) + goal_quat_list.append(quat_b.astype(np.float32)) + + goal_pos = torch.as_tensor(np.stack(goal_pos_list), dtype=torch.float32, device=self.sim.device) + goal_quat = ee_w[:, 3:7] + + if visualize: + self.show_goal(goal_pos, goal_quat) + ee_pos_b, ee_quat_b = subtract_frame_transforms( + root_w[:, :3], root_w[:, 3:7], goal_pos, goal_quat + ) + joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False, record = self.record) + self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1)) + is_grasp_success = self.is_grasp_success() + for b in range(B): + if self.final_gripper_state_list[b]: + self.wait(gripper_open=False, steps=10, record = self.record) + else: + self.wait(gripper_open=True, steps=10, record = self.record) + + return joint_pos, is_grasp_success + + + def is_success(self): + obj_w = self.object_prim.data.root_state_w[:, :7] + origins = self.scene.env_origins + obj_w[:, :3] -= origins + obj_pose_l = pose_to_mat(obj_w[:, :3], obj_w[:, 3:7]) + goal_pose_l = np.array(self.end_pose_list, dtype=np.float32) + goal_pose_l = pose_to_mat(goal_pose_l[:, :3], goal_pose_l[:, 3:7]) + trans_dist, angle = pose_distance(obj_pose_l, goal_pose_l) + print(f"[INFO] trans_dist: {trans_dist}, angle: {angle}") + if self.task_type == "simple_pick_place" or self.task_type == "simple_pick": + is_success = trans_dist < 0.10 + elif self.task_type == "targetted_pick_place": + is_success = (trans_dist < 0.10) & (angle < np.radians(10)) + else: + raise ValueError(f"[ERR] Invalid task type: {self.task_type}") + return is_success.cpu().numpy() + + def is_grasp_success(self): + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] + obj_w = self.object_prim.data.root_com_pos_w[:, 0:3] + dist = torch.norm(obj_w[:, :3] - ee_w[:, :3], dim=1) # [B] + print(f"[INFO] dist: {dist}") + return (dist < 0.15).cpu().numpy() + + + def viz_object_goals(self, sample_step=1, hold_steps=20): + self.reset() + self.wait(gripper_open=True, steps=10, record = False) + B = self.scene.num_envs + env_ids = torch.arange(B, device=self.object_prim.device, dtype=torch.long) + goals = self.obj_goal_traj_w + t_iter = list(range(0, goals.shape[1], sample_step)) + t_iter = t_iter + [goals.shape[1]-1] if t_iter[-1] != goals.shape[1]-1 else t_iter + t_iter = t_iter[-1:] + for t in t_iter: + print(f"[INFO] viz object goal step {t}/{goals.shape[1]}") + pos_list, quat_list = [], [] + for b in range(B): + pos, quat = mat_to_pose(goals[b, t]) + pos_list.append(pos.astype(np.float32)) + quat_list.append(quat.astype(np.float32)) + pose = self.object_prim.data.root_state_w[:, :7] + # pose = self.object_prim.data.root_com_state_w[:, :7] + pose[:, :3] = torch.tensor(np.stack(pos_list), dtype=torch.float32, device=pose.device) + pose[:, 3:7] = torch.tensor(np.stack(quat_list), dtype=torch.float32, device=pose.device) + self.show_goal(pose[:, :3], pose[:, 3:7]) + + for _ in range(hold_steps): + self.object_prim.write_root_pose_to_sim(pose, env_ids=env_ids) + self.object_prim.write_data_to_sim() + self.step() + + # ---------- Helpers ---------- + def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch.Tensor): + """World → robot base frame for all envs.""" + root = self.robot.data.root_state_w[:, 0:7] # [B,7] + p_w, q_w = self._ensure_batch_pose(pos_w, quat_w) + pb, qb = subtract_frame_transforms( + root[:, 0:3], root[:, 3:7], p_w, q_w + ) + return pb, qb # [B,3], [B,4] + + # ---------- Batched execution & lift-check ---------- + def build_grasp_info( + self, + grasp_pos_w_batch: np.ndarray, # (B,3) GraspNet proposal in world frame + grasp_quat_w_batch: np.ndarray, # (B,4) wxyz + pregrasp_pos_w_batch: np.ndarray, + pregrasp_quat_w_batch: np.ndarray, + + ) -> dict: + """ + 返回与 _build_info 相同结构,但每个 env 的抓取都可不同。 + """ + B = self.scene.num_envs + p_w = np.asarray(grasp_pos_w_batch, dtype=np.float32).reshape(B, 3) + q_w = np.asarray(grasp_quat_w_batch, dtype=np.float32).reshape(B, 4) + pre_p_w = np.asarray(pregrasp_pos_w_batch, dtype=np.float32).reshape(B, 3) + pre_q_w = np.asarray(pregrasp_quat_w_batch, dtype=np.float32).reshape(B, 4) + + + origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32) # (B,3) + pre_p_w = pre_p_w + origins + p_w = p_w + origins + + pre_pb, pre_qb = self._to_base(pre_p_w, pre_q_w) + pb, qb = self._to_base(p_w, q_w) + + return { + "pre_p_w": pre_p_w, "p_w": p_w, "q_w": q_w, + "pre_p_b": pre_pb, "pre_q_b": pre_qb, + "p_b": pb, "q_b": qb, + } + + + def inference(self, std: float = 0.0) -> list[int]: + """ + Main function of the heuristic manipulation policy. + Physical trial-and-error grasping with approach-axis perturbation: + • Multiple grasp proposals executed in parallel; + • Every attempt does reset → pre → grasp → close → lift → check; + • Early stops when any env succeeds; then re-exec for logging. + """ + B = self.scene.num_envs + + self.wait(gripper_open=True, steps=10, record = self.record) + + + # reset and conduct main process: open→pre→grasp→close→follow_object_goals + self.reset() + #self.viz_object_goals() + + + cam_p = self.camera.data.pos_w + cam_q = self.camera.data.quat_w_ros + gp_w = torch.as_tensor(np.array(self.grasp_pose_list, dtype=np.float32)[:,:3], dtype=torch.float32, device=self.sim.device) + gq_w = torch.as_tensor(np.array(self.grasp_pose_list, dtype=np.float32)[:,3:7], dtype=torch.float32, device=self.sim.device) + pre_w = torch.as_tensor(np.array(self.pregrasp_pose_list, dtype=np.float32)[:,:3], dtype=torch.float32, device=self.sim.device) + gp_cam, gq_cam = subtract_frame_transforms(cam_p, cam_q, gp_w, gq_w) + pre_cam, pre_qcm = subtract_frame_transforms(cam_p, cam_q, pre_w, gq_w) + self.save_dict["grasp_pose_cam"] = torch.cat([gp_cam, gq_cam], dim=1).unsqueeze(0).cpu().numpy() + self.save_dict["pregrasp_pose_cam"] = torch.cat([pre_cam, pre_qcm], dim=1).unsqueeze(0).cpu().numpy() + + jp = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids] + self.wait(gripper_open=True, steps=4, record = self.record) + + # pre → grasp + info_all = self.build_grasp_info(gp_w.cpu().numpy(), gq_w.cpu().numpy(), pre_w.cpu().numpy(), gq_w.cpu().numpy()) + jp, success = self.move_to(info_all["pre_p_b"], info_all["pre_q_b"], gripper_open=True, record = self.record) + if torch.any(success==False): return [] + self.save_dict["actions"].append(np.concatenate([info_all["pre_p_b"].cpu().numpy(), info_all["pre_q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) + jp = self.wait(gripper_open=True, steps=3, record = self.record) + + jp, success = self.move_to(info_all["p_b"], info_all["q_b"], gripper_open=True, record = self.record) + if torch.any(success==False): return [] + self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) + + # close gripper + jp = self.wait(gripper_open=False, steps=50, record = self.record) + self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.ones((B, 1))], axis=1)) + + # object goal following + # self.lift_up(height=0.05, gripper_open=False) + if self.task_type == "simple_pick_place" or self.task_type == "simple_pick": + jp, is_success = self.follow_object_centers(jp, sample_step=1, visualize=True) + elif self.task_type == "targetted_pick_place": + jp, is_success = self.follow_object_goals(jp, sample_step=1, visualize=True) + else: + raise ValueError(f"[ERR] Invalid task type: {self.task_type}") + #jp = self.follow_object_goals(jp, sample_step=1, visualize=True) + + is_success = is_success & self.is_success() + # Arrange the output: we want to collect only the successful env ids as a list. + is_success = torch.tensor(is_success, dtype=torch.bool, device=self.sim.device) + success_env_ids = torch.where(is_success)[0].cpu().numpy().tolist() + + print(f"[INFO] success_env_ids: {success_env_ids}") + if self.record: + self.save_data(ignore_keys=["segmask", "depth"], env_ids=success_env_ids) + + return success_env_ids + + def run_batch_trajectory(self, traj_cfg_list: List[TrajectoryCfg]): + self.traj_cfg_list = traj_cfg_list + self.compute_components() + self.compute_object_goal_traj() + + return self.inference() + + + +# ──────────────────────────── Entry Point ──────────────────────────── + + + + +def sim_randomize_rollout(keys: list[str], args_cli: argparse.Namespace): + for key in keys: + success_trajectory_config_list = [] + total_require_traj_num = 50 + task_json_path = BASE_DIR / "tasks" / key / "task.json" + task_cfg = load_task_cfg(task_json_path) + randomizer = Randomizer(task_cfg) + random_task_cfg_list = randomizer.generate_randomized_scene_cfg(grid_dist=0.0 , grid_num=3, angle_random_range=1/20 * np.pi, angle_random_num=10, traj_randomize_num=10, scene_randomize_num=1, robot_pose_randomize_range=0, robot_pose_randomize_angle=np.pi/180, robot_pose_randomize_num=10) + + args_cli.key = key + sim_cfgs = load_sim_parameters(BASE_DIR, key) + + demo_dir = BASE_DIR / "tasks" / key / "demos" + data_dir = BASE_DIR / "data" / key + current_timestep = 0 + env, _ = make_env( + cfgs=sim_cfgs, num_envs=3, + device=args_cli.device, + bg_simplify=False, + ) + sim, scene = env.sim, env.scene + bg_rgb_path = task_cfg.bg_rgb_path + + bg_rgb = imageio.imread(bg_rgb_path) + my_sim = RandomizeExecution(sim, scene, sim_cfgs=sim_cfgs, demo_dir=demo_dir, data_dir=data_dir, record=True, args_cli=args_cli, bg_rgb=bg_rgb) + my_sim.task_cfg = task_cfg + while len(success_trajectory_config_list) < total_require_traj_num: + traj_cfg_list = random_task_cfg_list[current_timestep: current_timestep + 3] + current_timestep += 3 + + success_env_ids = my_sim.run_batch_trajectory(traj_cfg_list) + + env.close() + if len(success_env_ids) > 0: + for env_id in success_env_ids: + success_trajectory_config_list.append(traj_cfg_list[env_id]) + + print(f"[INFO] success_trajectory_config_list: {len(success_trajectory_config_list)}") + print(total_require_traj_num) + + # for timestep in range(len(success_trajectory_config_list),10): + # traj_cfg_list = random_task_cfg_list[timestep: min(timestep + 10, len(random_task_cfg_list))] + # my_sim = RandomizeExecution(sim, scene, sim_cfgs=sim_cfgs, traj_cfg_list=traj_cfg_list, demo_dir=demo_dir, record=True) + # success_env_ids = my_sim.inference() + # del my_sim + # torch.cuda.empty_cache() + + + return success_trajectory_config_list + diff --git a/openreal2sim/simulation/isaaclab/sim_utils/calibration_utils.py b/openreal2sim/simulation/isaaclab/sim_utils/calibration_utils.py old mode 100644 new mode 100755 diff --git a/openreal2sim/simulation/isaaclab/sim_utils/sim_utils.py b/openreal2sim/simulation/isaaclab/sim_utils/sim_utils.py old mode 100644 new mode 100755 index 08dc60f..062a3dc --- a/openreal2sim/simulation/isaaclab/sim_utils/sim_utils.py +++ b/openreal2sim/simulation/isaaclab/sim_utils/sim_utils.py @@ -9,7 +9,7 @@ from sim_utils.calibration_utils import calibration_to_robot_pose, load_extrinsics def load_sim_parameters(basedir, key) -> dict: - scene_json_path = Path(basedir) / "outputs" / key / "scene" / "scene.json" + scene_json_path = Path(basedir) / "outputs" / key / "simulation" / "scene.json" scene_json = json.load(open(scene_json_path, "r")) exp_config = yaml.load(open(Path(basedir) / "config/config.yaml"), Loader=yaml.FullLoader) exp_config = exp_config["local"][key]["simulation"] @@ -44,23 +44,24 @@ def load_sim_parameters(basedir, key) -> dict: } # demo configs + goal_offset = exp_config.get("goal_offset", 0) grasp_idx = exp_config.get("grasp_idx", -1) grasp_pre = exp_config.get("grasp_pre", None) grasp_delta = exp_config.get("grasp_delta", None) - traj_key = exp_config.get("traj_key", "fdpose_trajs") # "fdpose_trajs", "simple_trajs", "hybrid_trajs" - manip_object_id = exp_config.get("manip_object_id", "1") - traj_path = scene_json["objects"][manip_object_id][traj_key] + manip_object_id = scene_json["manipulated_oid"] + traj_path = scene_json["objects"][manip_object_id]["final_trajs"] grasp_path = scene_json["objects"][manip_object_id].get("grasps", None) + final_gripper_closed = scene_json["final_gripper_closed"] demo_cfg = { "manip_object_id": manip_object_id, - "traj_key": traj_key, "traj_path": traj_path, "goal_offset": goal_offset, "grasp_idx": grasp_idx, "grasp_pre": grasp_pre, "grasp_delta": grasp_delta, "grasp_path": grasp_path, + "final_gripper_closed": final_gripper_closed, } # physics configs diff --git a/openreal2sim/simulation/isaaclab/sim_utils/transform_utils.py b/openreal2sim/simulation/isaaclab/sim_utils/transform_utils.py old mode 100644 new mode 100755 diff --git a/openreal2sim/simulation/isaaclab/task_interface.py b/openreal2sim/simulation/isaaclab/task_interface.py new file mode 100755 index 0000000..e69de29 diff --git a/openreal2sim/simulation/modules/demo_motion_process.py b/openreal2sim/simulation/modules/demo_motion_process.py new file mode 100755 index 0000000..6566aee --- /dev/null +++ b/openreal2sim/simulation/modules/demo_motion_process.py @@ -0,0 +1,392 @@ +#### + +### assign object types, determine start and end frame, downsample traj, compute stacking sequence and assign mask. +### TODO: 加一个ending的mesh进来 就这样。 +import yaml +import numpy as np +from pyquaternion import Quaternion +from pathlib import Path +import pickle +import cv2 +import trimesh +import sys +import json +base_dir = Path.cwd() +sys.path.append(str(base_dir / "openreal2sim" / "simulation")) + +def recompute_traj(traj, c2w, extrinsics): + for i in range(len(traj)): + traj[i] = c2w @ np.linalg.inv(extrinsics[0]) @ extrinsics[i] @ np.linalg.inv(c2w) @ traj[i] + return traj + +def recompute_all_traj(scene_json_dict, scene_dict): + for obj_id, obj in scene_json_dict["objects"].items(): + for key in 'fdpose_trajs', 'simple_trajs', 'hybrid_trajs': + traj_path = obj[key] + traj = np.load(traj_path) + traj = traj.reshape(-1, 4, 4) + c2w = np.array(scene_json_dict["camera"]["camera_opencv_to_world"]).astype(np.float64).reshape(4,4) + extrinsics = np.array(scene_dict["extrinsics"]).astype(np.float64).reshape(-1,4,4) + traj = recompute_traj(traj, c2w, extrinsics) + new_traj_path = traj_path.replace(".npy", "_recomputed.npy").replace("reconstruction", "simulation").replace("motions","") + scene_json_dict["objects"][obj_id][f"{key}_recomputed"] = str(new_traj_path) + np.save(new_traj_path, traj) + + +def get_mask_from_frame(scene_dict, frame_index, oid): + mask = scene_dict["mask"][frame_index][oid]["mask"] + return mask + + +def get_object_bbox(mesh_in_path): + mesh = trimesh.load(mesh_in_path) + bbox = mesh.bounds + return bbox + + + +def lift_traj(traj, height=0.02): + new_traj = [] + for i in range(len(traj)): + new_traj.append(traj[i]) + for i in range(1, len(new_traj) - 1): + new_traj[i][2][3] += height + return new_traj + +def determine_stacking_bbox(bbox_0, bbox_1): + """ + Determine if two bboxes have intersection on x and y axes. + + bbox_0, bbox_1: np.ndarray, (2,3) or (2,2) or (2,?) where axis 0 is (min,max) and axis 1 is x,y,(z) + + Returns: True if x and y intervals overlap (i.e. stacking is possible), False otherwise + + Generally this is enough for start frame. + """ + # Extract x/y intervals + x0_min, x0_max = bbox_0[0][0], bbox_0[1][0] + y0_min, y0_max = bbox_0[0][1], bbox_0[1][1] + x1_min, x1_max = bbox_1[0][0], bbox_1[1][0] + y1_min, y1_max = bbox_1[0][1], bbox_1[1][1] + z0_min, z0_max = bbox_0[0][2], bbox_0[1][2] + z1_min, z1_max = bbox_1[0][2], bbox_1[1][2] + + x_overlap = not (x0_max < x1_min or x1_max < x0_min) + y_overlap = not (y0_max < y1_min or y1_max < y0_min) + if x_overlap and y_overlap: + return True + + return False + + + +def load_obj_masks(data: dict): + """ + Return object list for frame-0: + [{'mask': bool array, 'name': name, 'bbox': (x1,y1,x2,y2)}, ...] + Filter out names: 'ground' / 'hand' / 'robot' + """ + frame_objs = data.get(0, {}) # only frame 0 + objs = [] + for oid, item in frame_objs.items(): + name = item["name"] + if name in ("ground", "hand", "robot"): + continue + objs.append({ + "oid": oid, + "mask": item["mask"].astype(bool), + "name": name, + "bbox": item["bbox"] # used for cropping + }) + # Keep original behavior: sort by mask area (desc) + objs.sort(key=lambda x: int(x["oid"])) + return objs + + + +def pose_distance(T1, T2): + # Translation distance + t1 = T1[:3, 3] + t2 = T2[:3, 3] + trans_dist = np.linalg.norm(t2 - t1) + # Rotation difference (angle) + R1 = T1[:3, :3] + R2 = T2[:3, :3] + dR = R2 @ R1.T + # numerical errors can make the trace slightly out of bounds + cos_angle = (np.trace(dR) - 1) / 2 + cos_angle = np.clip(cos_angle, -1.0, 1.0) + angle = np.arccos(cos_angle) + return trans_dist, angle + +def build_mask_array( + oid: int, + scene_dict: dict[str, any] +) -> np.ndarray: + + H, W, N = scene_dict["height"], scene_dict["width"], scene_dict["n_frames"] + out = np.zeros((N, H, W), dtype=np.uint8) + for i in range(N): + m = scene_dict["mask"][i][oid]["mask"] + out[i] = m.astype(np.uint8) + return out + + + +def downsample_traj(trajectory, trans_threshold=0.05, rot_threshold=np.radians(20)): + """ + Downsample pose trajectory where each element is a 4x4 transformation matrix (SE3). + Retains keyframes with sufficient translation or rotation from the previous kept frame. + Returns: list of indices for retained frames. + """ + if len(trajectory) <= 2: + return list(range(len(trajectory))) + + + downsampled_indices = [0] + prev_idx = 0 + for i in range(1, len(trajectory) - 1): + trans_dist, angle = pose_distance(trajectory[prev_idx], trajectory[i]) + if trans_dist >= trans_threshold or angle >= rot_threshold: + downsampled_indices.append(i) + prev_idx = i + if len(downsampled_indices) > 0: + last_kept_idx = downsampled_indices[-1] + trans_dist, angle = pose_distance(trajectory[last_kept_idx], trajectory[-1]) + if trans_dist > 0.6 * trans_threshold or angle > 0.6 * rot_threshold: + downsampled_indices.append(len(trajectory) - 1) + elif trans_dist > 0.1 * trans_threshold or angle > 0.1 * rot_threshold: + downsampled_indices[-1] = len(trajectory) - 1 + + return downsampled_indices + +def refine_end_frame(hand_masks, object_masks): + N = min(len(hand_masks), len(object_masks)) + kernel = np.ones((10, 10), np.uint8) + for i in range(N-1, -1, -1): + if hand_masks[i] is None: + continue + hand_mask = hand_masks[i].astype(np.uint8) + obj_mask = object_masks[i].astype(np.uint8) + obj_mask_dilated = cv2.dilate(obj_mask, kernel, iterations=1) + overlap = (hand_mask & obj_mask_dilated).sum() + if overlap > 0: + return i + 1 + return 0 + + +def demo_motion_process(keys, key_scene_dicts, key_cfgs): + base_dir = Path.cwd() + for key in keys: + scene_dict = key_scene_dicts[key] + scene_json_dict_path = base_dir / f"outputs/{key}/simulation/scene.json" + with open(scene_json_dict_path, "r") as f: + scene_json_dict = json.load(f) + recompute_all_traj(scene_json_dict, scene_dict) + key_cfg = key_cfgs[key] + max_placement_oid = None + max_placement_distance = 0.0 + placement_distances = {} + fd_traj_dict = {} + for obj_id, obj in scene_json_dict["objects"].items(): + obj_traj = obj["simple_trajs_recomputed"] + obj_traj = np.load(obj_traj) + obj_traj = obj_traj.reshape(-1, 4, 4) + abs_distance, _ = pose_distance(obj_traj[0], obj_traj[-1]) + fd_traj = obj["fdpose_trajs_recomputed"] + fd_traj = np.load(fd_traj) + fd_traj = fd_traj.reshape(-1, 4, 4) + fd_dist, fd_angle = pose_distance(fd_traj[0], fd_traj[-1]) + fd_traj_dict[obj_id] = { + "fd_distance": fd_dist, + "fd_angle": fd_angle + } + print(f"Object: {obj_id}, distance: {abs_distance}") + if abs_distance > max_placement_distance: + max_placement_distance = abs_distance + max_placement_oid = obj_id + scene_json_dict["objects"][obj_id]["type"] = "static" + placement_distances[obj_id] = abs_distance + scene_json_dict["manipulated_oid"] = max_placement_oid + name = scene_json_dict["objects"][max_placement_oid]["name"] + scene_json_dict["objects"][max_placement_oid]["type"] = "manipulated" + print(f"Manipulated object: {max_placement_oid}, distance: {abs_distance}") + manipulated_fd_trajs_path = scene_json_dict["objects"][max_placement_oid]["fdpose_trajs_recomputed"] + manipulated_fd_trajs = np.load(manipulated_fd_trajs_path) + angle_displacement = [pose_distance(manipulated_fd_trajs[i], manipulated_fd_trajs[i+1])[1] for i in range(len(manipulated_fd_trajs)-1)] + angle_displacement_sum = sum(angle_displacement) + + fd_traj_dict.pop(max_placement_oid) + traj_key = 'fdpose_trajs_recomputed' + if len(fd_traj_dict.keys()) > 0: + max_fd_distance = max(fd_traj_dict.values(), key=lambda x: x["fd_distance"]) + max_fd_angle = max(fd_traj_dict.values(), key=lambda x: x["fd_angle"]) + else: + max_fd_distance = 0.0 + max_fd_angle = 0.0 + if angle_displacement_sum > 180 or max_fd_angle > np.radians(20): + traj_key = 'simple_trajs_recomputed' + else: + if max_fd_distance > 0.05: + traj_key = 'hybrid_trajs_recomputed' + print(f"Traj key: {traj_key}") + scene_json_dict["traj_key"] = traj_key + manipulated_trajs_path = scene_json_dict["objects"][max_placement_oid][traj_key] + manipulated_trajs = np.load(manipulated_trajs_path) + object_masks = build_mask_array(int(max_placement_oid), scene_dict) + hand_masks = scene_dict["simulation"]["hand_masks"] + end_frame = refine_end_frame(hand_masks, object_masks) + print(f"End frame: {end_frame}") + trajs = manipulated_trajs + if end_frame == len(manipulated_trajs) - 1: + scene_json_dict["gripper_closed"] = True + elif end_frame == 0: + if np.abs(manipulated_trajs[0][2][3]) < 0.03: + scene_json_dict["gripper_closed"] = False + else: + scene_json_dict["gripper_closed"] = True + else: + scene_json_dict["gripper_closed"] = False + #trajs = manipulated_trajs[:end_frame] + + # require_calib = False + # placement_distances = {} + # for obj_id, distance in placement_distances.items(): + # if obj_id == max_placement_oid: + # placement_distances.pop(obj_id) + # break + # for obj_id, distance in placement_distances.items(): + # if distance > 0.05: + # require_calib = True + # break + # if require_calib: + # new_manip_traj = [] + # calib_goal_id = np.argmax(placement_distances.values()) + # calib_goal_oid = list(placement_distances.keys())[calib_goal_id] + # calib_trajs_path = scene_dict["info"]["objects"][calib_goal_oid]["fdpose_trajs"] + # calib_trajs = np.load(calib_trajs_path) + # calib_ref_pose = calib_trajs[0].reshape(4, 4) # calib物体参考位姿(理想静止状态) + # for (single_manip_traj, single_calib_traj) in zip(manipulated_trajs, calib_trajs): + # single_manip_traj = single_manip_traj.reshape(4, 4) + # single_calib_traj = single_calib_traj.reshape(4, 4) + # # 把当前obj相对于当前calib的变换应用到原先的calib上(消除calib漂移) + # # 步骤:1) 计算obj相对于当前calib的变换: inv(single_calib_traj) @ single_manip_traj + # # 2) 将这个相对变换应用到参考calib: calib_ref_pose @ (相对变换) + # # import pdb; pdb.set_trace() + # new_single_traj = calib_ref_pose @ np.linalg.inv(single_calib_traj) @ single_manip_traj + # new_manip_traj.append(new_single_traj) + # new_manip_traj = np.array(new_manip_traj) + # np.save(manipulated_trajs_path, new_manip_traj) + # trajs = new_manip_traj[:end_frame] + # else: + # trajs = manipulated_trajs[:end_frame] + downsampled_indices = downsample_traj(trajs, trans_threshold=0.03, rot_threshold=np.radians(20)) + scene_json_dict["chosen_indices"] = downsampled_indices + trajs = trajs[downsampled_indices] + + + trajs = lift_traj(trajs, height=0.02) + print(f"Downsampled indices: {[downsampled_indices]}") + downsampled_traj_path = base_dir / f"outputs/{key}/simulation" / f"{max_placement_oid}_final_traj.npy" + + + print(f"Trajs: {trajs[-1]}") + np.save(downsampled_traj_path, trajs) + scene_json_dict["objects"][max_placement_oid]["final_trajs"] = str(downsampled_traj_path) + scene_json_dict["start_frame_idx"] = downsampled_indices[1] + + end_pose = trajs[-1] + mesh_in_path = scene_json_dict["objects"][max_placement_oid]["optimized"] + mesh = trimesh.load(mesh_in_path) + mesh.apply_transform(end_pose) + path = base_dir / f"outputs/{key}/simulation/{max_placement_oid}_{name}_end.glb" + mesh.export(path) + scene_json_dict["objects"][max_placement_oid]["end_mesh"] = str(path) + bbox_end = get_object_bbox(path) + + start_related = [] + end_related = [] + mesh_in_path = scene_json_dict["objects"][max_placement_oid]["optimized"] + bbox_start = get_object_bbox(mesh_in_path) + for oid, obj in scene_json_dict["objects"].items(): + if not isinstance(oid, int): + continue + if oid == max_placement_oid: + continue + mesh_in_path = obj["optimized"] + bbox = get_object_bbox(mesh_in_path) + if determine_stacking_bbox(bbox_start, bbox): + start_related.append(oid) + if determine_stacking_bbox(bbox_end, bbox): + end_related.append(oid) + + print(f"Start related: {start_related}") + print(f"End related: {end_related}") + + scene_json_dict["start_related"] = list(start_related) + scene_json_dict["end_related"] = list(end_related) + + scene_path = scene_json_dict["scene_mesh"]["optimized"] + scene_mesh = trimesh.load(scene_path) + scene_mesh = scene_mesh + mesh + path = base_dir / f"outputs/{key}/simulation/scene_end.glb" + scene_mesh.export(path) + + + + if len(end_related) > 0: + target_oid = end_related[0] + scene_json_dict["target_oid"] = target_oid + scene_json_dict["task_type"] = "targetted_pick_place" + if len(scene_json_dict["task_desc"]) > 0: + task_desc = "Pick up the " + name + " and place it on the " + target_oid + "." + if task_desc not in scene_json_dict["task_desc"]: + scene_json_dict["task_desc"].append(task_desc) + else: + task_desc = "Pick up the " + name + " and place it on the " + target_oid + "." + if task_desc not in scene_json_dict["task_desc"]: + scene_json_dict["task_desc"] = [task_desc] + else: + if bbox_end[0][2] < 0.03: + scene_json_dict["task_type"] = "simple_pick_place" + task_desc = "Pick up the " + name + " and place it on the ground." + if len(scene_json_dict.get("task_desc", [])) == 0: + scene_json_dict["task_desc"] = [task_desc] + if task_desc not in scene_json_dict["task_desc"]: + scene_json_dict["task_desc"].append(task_desc) + else: + scene_json_dict["task_type"] = "simple_pick" + task_desc = "Pick up the " + name + "." + if len(scene_json_dict.get("task_desc", [])) == 0: + scene_json_dict["task_desc"] = [task_desc] + if task_desc not in scene_json_dict["task_desc"]: + scene_json_dict["task_desc"].append(task_desc) + + print(f"Task description: {scene_json_dict['task_desc']}") + print(f"Task type: {scene_json_dict['task_type']}") + + with open(base_dir / f"outputs/{key}/simulation/scene.json", "w") as f: + json.dump(scene_json_dict, f, indent=2) + with open(base_dir / f"outputs/{key}/scene/scene.pkl", "wb") as f: + pickle.dump(scene_dict, f) + return key_scene_dicts + + +if __name__ == "__main__": + base_dir = Path.cwd() + cfg_path = base_dir / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + keys = cfg["keys"] + from utils.compose_config import compose_configs + key_cfgs = {key: compose_configs(key, cfg) for key in keys} + print(f"Key cfgs: {key_cfgs}") + key_scene_dicts = {} + for key in keys: + scene_pkl = base_dir / f'outputs/{key}/scene/scene.pkl' + with open(scene_pkl, 'rb') as f: + scene_dict = pickle.load(f) + key_scene_dicts[key] = scene_dict + demo_motion_process(keys, key_scene_dicts, key_cfgs) + + + diff --git a/openreal2sim/simulation/modules/envs/randomzier.py b/openreal2sim/simulation/modules/envs/randomzier.py new file mode 100755 index 0000000..fde28fa --- /dev/null +++ b/openreal2sim/simulation/modules/envs/randomzier.py @@ -0,0 +1,348 @@ +import numpy as np +import random +from scipy.spatial.transform import Rotation as R +from scipy.spatial.transform import Slerp +from .task_cfg import TaskCfg, TaskType, SuccessMetric, SuccessMetricType, TrajectoryCfg + +import torch +import transforms3d + +def pose_to_mat(pos, quat): + if torch.is_tensor(pos): pos = pos.cpu().numpy() + if torch.is_tensor(quat): quat = quat.cpu().numpy() + m = np.eye(4, dtype=np.float32) + m[:3, :3] = transforms3d.quaternions.quat2mat(quat) + m[:3, 3] = pos + return m + +def mat_to_pose(mat): + pos = mat[:3, 3] + quat = transforms3d.quaternions.mat2quat(mat[:3, :3]) + return pos, quat + + + +class Randomizer(TaskCfg): + def __init__(self, task_cfg: TaskCfg): + self.task_cfg = task_cfg + + + def generate_randomized_scene_cfg(self, grid_dist: float, grid_num: int, angle_random_range: float, angle_random_num: int, traj_randomize_num:int, scene_randomize_num: int, robot_pose_randomize_range, robot_pose_randomize_angle, robot_pose_randomize_num): + # Step 1: Generate candidate transforms + + candidate_transforms = [] + for i in range(-grid_num, grid_num): + for j in range(-grid_num, grid_num): + random_angles = np.random.uniform(-angle_random_range, angle_random_range, angle_random_num) + for angle in random_angles: + orig = np.eye(4) + orig[0, 3] = i * grid_dist + orig[1, 3] = j * grid_dist + orig[0, 0] = np.cos(angle) + orig[0, 1] = -np.sin(angle) + orig[1, 0] = np.sin(angle) + orig[1, 1] = np.cos(angle) + candidate_transforms.append(orig) + + # Step 2: Generate traj_randomize_num combinations of (start_pose, end_pose) + traj_pose_pairs = [] + for _ in range(traj_randomize_num): + start_pose = random.choice(candidate_transforms) + if self.task_cfg.task_type == TaskType.SIMPLE_PICK: + end_pose = start_pose.copy() # For SIMPLE_PICK, end same as start + else: + end_pose = random.choice(candidate_transforms) + traj_pose_pairs.append((start_pose, end_pose)) + + # Step 3: Generate scene_randomize_num combinations for other objects + other_object_ids = [obj.object_id for obj in self.task_cfg.objects + if obj.object_id != self.task_cfg.manipulated_oid and obj.object_id not in self.task_cfg.start_related and obj.object_id not in self.task_cfg.end_related] + + scene_poses_combinations = [] + for _ in range(scene_randomize_num): + # Create list of (oid, pose) pairs + other_object_poses = [(oid, random.choice(candidate_transforms)) + for oid in other_object_ids] + scene_poses_combinations.append(other_object_poses) + + # Step 4: Generate robot_pose_randomize_num random robot poses + # robot_pose format: [x, y, z, w, x, y, z] (position + quaternion wxyz) + ref_traj = self.task_cfg.reference_trajectory + assert ref_traj is not None, "Reference trajectory is not found" + ref_robot_pose = np.array(ref_traj.robot_pose) + robot_pose_mat = pose_to_mat(ref_robot_pose[:3], ref_robot_pose[3:7]) + + robot_poses = [] + for _ in range(robot_pose_randomize_num): + # Random translation within range + random_trans = np.random.uniform( + -robot_pose_randomize_range, + robot_pose_randomize_range, + 3 + ) + random_rot = np.random.uniform( + -robot_pose_randomize_angle, + robot_pose_randomize_angle, + ) + + rotate_matrix = np.eye(4) + rotate_matrix[:3, :3] = R.from_euler('z', random_rot).as_matrix() + new_robot_pose = robot_pose_mat @ rotate_matrix + new_robot_pose[:3, 3] += random_trans + # Combine position and quaternion [x, y, z, w, x, y, z] + pos, quat = mat_to_pose(new_robot_pose) + robot_pose_7d = np.concatenate([pos, quat]) + robot_poses.append(robot_pose_7d.tolist()) + + # Step 5: Generate trajectories for all combinations + generated_trajectories = [] + ref_traj = self.task_cfg.reference_trajectory + for start_pose, end_pose in traj_pose_pairs: + for other_object_poses in scene_poses_combinations: + for robot_pose in robot_poses: + # Generate trajectory for manipulated object + if self.task_cfg.task_type == TaskType.SIMPLE_PICK: + new_traj_mats = [] + for traj_pose_7d in ref_traj.object_trajectory: + pose_mat = pose_to_mat(np.array(traj_pose_7d[:3]), np.array(traj_pose_7d[3:7])) + new_traj_mats.append(start_pose @ pose_mat) + # Convert back to 7D format + new_traj_7d = [] + for mat in new_traj_mats: + pos, quat = mat_to_pose(mat) + new_traj_7d.append(np.concatenate([pos, quat]).tolist()) + + # Transform pregrasp and grasp poses + if ref_traj.pregrasp_pose: + pregrasp_mat = start_pose @ pose_to_mat(np.array(ref_traj.pregrasp_pose[:3]), np.array(ref_traj.pregrasp_pose[3:7])) + pos, quat = mat_to_pose(pregrasp_mat) + pregrasp_pose = np.concatenate([pos, quat]).tolist() + else: + pregrasp_pose = None + + if ref_traj.grasp_pose: + grasp_mat = start_pose @ pose_to_mat(np.array(ref_traj.grasp_pose[:3]), np.array(ref_traj.grasp_pose[3:7])) + pos, quat = mat_to_pose(grasp_mat) + grasp_pose = np.concatenate([pos, quat]).tolist() + else: + grasp_pose = None + else: + # Convert reference trajectory to mat format + ref_traj_mats = [] + for traj_pose_7d in ref_traj.object_trajectory: + ref_traj_mats.append(pose_to_mat(np.array(traj_pose_7d[:3]), np.array(traj_pose_7d[3:7]))) + ref_traj_mats = np.array(ref_traj_mats) + + new_traj_mats = self.compute_new_traj(start_pose, end_pose, ref_traj_mats) + + # Convert back to 7D format + new_traj_7d = [] + for mat in new_traj_mats: + pos, quat = mat_to_pose(mat) + new_traj_7d.append(np.concatenate([pos, quat]).tolist()) + #new_traj_7d = lift_traj(new_traj_7d) + # Transform pregrasp and grasp poses + if ref_traj.pregrasp_pose: + pregrasp_mat = start_pose @ pose_to_mat(np.array(ref_traj.pregrasp_pose[:3]), np.array(ref_traj.pregrasp_pose[3:7])) + pos, quat = mat_to_pose(pregrasp_mat) + pregrasp_pose = np.concatenate([pos, quat]).tolist() + else: + pregrasp_pose = None + + if ref_traj.grasp_pose: + grasp_mat = start_pose @ pose_to_mat(np.array(ref_traj.grasp_pose[:3]), np.array(ref_traj.grasp_pose[3:7])) + pos, quat = mat_to_pose(grasp_mat) + grasp_pose = np.concatenate([pos, quat]).tolist() + else: + grasp_pose = None + + # Apply scene randomization to other objects (if needed, update object_poses here) + # other_object_poses is now a list of (oid, pose) pairs + + # Create success metric + + ref_end_pose_mat = pose_to_mat( + np.array(ref_traj.success_metric.end_pose[:3]), + np.array(ref_traj.success_metric.end_pose[3:7]) + ) + end_pose_metric_mat = end_pose @ ref_end_pose_mat + pos, quat = mat_to_pose(end_pose_metric_mat) + end_pose_metric_7d = np.concatenate([pos, quat]).tolist() + + success_metric_type = ref_traj.success_metric.success_metric_type + if success_metric_type == SuccessMetricType.SIMPLE_LIFT: + success_metric = SuccessMetric( + success_metric_type=SuccessMetricType.SIMPLE_LIFT, + lift_height=ref_traj.success_metric.lift_height, + final_gripper_close=ref_traj.success_metric.final_gripper_close, + end_pose=end_pose_metric_7d + ) + elif success_metric_type == SuccessMetricType.TARGET_POINT: + # Convert end_pose from 7D to mat, transform, then back to 7D + success_metric = SuccessMetric( + success_metric_type=SuccessMetricType.TARGET_POINT, + final_gripper_close=ref_traj.success_metric.final_gripper_close, + end_pose=end_pose_metric_7d + ) + elif success_metric_type == SuccessMetricType.TARGET_PLANE: + success_metric = SuccessMetric( + success_metric_type=SuccessMetricType.TARGET_PLANE, + ground_value=ref_traj.success_metric.ground_value, + final_gripper_close=ref_traj.success_metric.final_gripper_close, + end_pose=end_pose_metric_7d + ) + + # Store object poses in 7D format + object_poses = {} + for oid, pose_mat in other_object_poses: + pos, quat = mat_to_pose(pose_mat) + object_poses[oid] = np.concatenate([pos, quat]).tolist() + + # Add start and end related objects + start_pos, start_quat = mat_to_pose(start_pose) + start_pose_7d = np.concatenate([start_pos, start_quat]).tolist() + for oid in self.task_cfg.start_related: + object_poses[oid] = start_pose_7d + + end_pos, end_quat = mat_to_pose(end_pose) + end_pose_7d = np.concatenate([end_pos, end_quat]).tolist() + for oid in self.task_cfg.end_related: + object_poses[oid] = end_pose_7d + + object_poses[self.task_cfg.manipulated_oid] = start_pose_7d + + robot_type = ref_traj.robot_type + generated_trajectories.append(TrajectoryCfg( + robot_pose=robot_pose, + object_poses=object_poses, + object_trajectory=new_traj_7d, + final_gripper_close=ref_traj.final_gripper_close, + pregrasp_pose=pregrasp_pose, + grasp_pose=grasp_pose, + success_metric=success_metric, + robot_type=robot_type + )) + + # Total trajectories = traj_randomize_num * scene_randomize_num * robot_pose_randomize_num + assert len(generated_trajectories) == traj_randomize_num * scene_randomize_num * robot_pose_randomize_num, \ + f"Expected {traj_randomize_num * scene_randomize_num * robot_pose_randomize_num} trajectories, got {len(generated_trajectories)}" + + random.shuffle(generated_trajectories) + return generated_trajectories + + @staticmethod + def compute_new_traj(start_trans: np.ndarray, end_trans: np.ndarray, reference_traj: np.ndarray, + interp_mode: str = 'slerp') -> np.ndarray: + """ + Compute interpolated trajectory using Real2Render2Real's relative shape preservation method. + + This maintains the relative motion pattern of the reference trajectory by normalizing + the progress along the reference and mapping it to the new start and end positions. + + Args: + start_trans: 4x4 transformation matrix from original start to new start + end_trans: 4x4 transformation matrix from original end to new end + reference_traj: Reference trajectory in SE(3), shape (N, 4, 4) or (N, 16) + interp_mode: 'linear' or 'slerp' for rotation interpolation (default: 'slerp') + + Returns: + New interpolated trajectory with same shape as reference_traj + """ + # Ensure reference_traj is in (N, 4, 4) format + if reference_traj.ndim == 2 and reference_traj.shape[1] == 16: + reference_traj = reference_traj.reshape(-1, 4, 4) + elif reference_traj.ndim == 3 and reference_traj.shape[1] == 4 and reference_traj.shape[2] == 4: + reference_traj = reference_traj.copy() + else: + raise ValueError(f"Invalid reference_traj shape: {reference_traj.shape}, expected (N, 4, 4) or (N, 16)") + + N = len(reference_traj) + if N == 0: + return reference_traj.copy() + + # Get start and end poses from reference trajectory + ref_start = reference_traj[0].copy() + ref_end = reference_traj[-1].copy() + + # Compute new start and end poses + new_start_mat = start_trans @ ref_start + new_end_mat = end_trans @ ref_end + + # Convert to 7D format using mat_to_pose + ref_traj_7d = [] + for pose_mat in reference_traj: + pos, quat = mat_to_pose(pose_mat) + ref_traj_7d.append(np.concatenate([pos, quat])) + ref_traj_7d = np.array(ref_traj_7d) + + pos_start, quat_start = mat_to_pose(new_start_mat) + pos_end, quat_end = mat_to_pose(new_end_mat) + + # Initialize new trajectory + new_traj_7d = np.zeros_like(ref_traj_7d) + + # Normalize time steps + t = np.linspace(0, 1, N) + + # Split into position and rotation components + pos_orig = ref_traj_7d[:, :3] + quat_orig = ref_traj_7d[:, 3:7] # wxyz format (from transforms3d) + + ref_start_pos, ref_start_quat = mat_to_pose(ref_start) + ref_end_pos, ref_end_quat = mat_to_pose(ref_end) + + # Interpolate positions: preserve relative shape from reference trajectory + for axis in range(3): + ref_range = ref_end_pos[axis] - ref_start_pos[axis] + if np.abs(ref_range) > 1e-10: + # Normalize progress along reference trajectory + normalized_progress = (pos_orig[:, axis] - ref_start_pos[axis]) / ref_range + # Map to new range + new_traj_7d[:, axis] = pos_start[axis] + (pos_end[axis] - pos_start[axis]) * normalized_progress + else: + # If no change in reference, use direct transformation + new_traj_7d[:, axis] = pos_orig[:, axis] + (pos_start[axis] - ref_start_pos[axis]) + + # Interpolate rotations using SLERP + if interp_mode == 'slerp': + # Use scipy Slerp for spherical linear interpolation + # Convert wxyz to xyzw for scipy + quat_start_xyzw = np.array([quat_start[1], quat_start[2], quat_start[3], quat_start[0]]) + quat_end_xyzw = np.array([quat_end[1], quat_end[2], quat_end[3], quat_end[0]]) + + # Create Slerp interpolator + key_rots = R.from_quat([quat_start_xyzw, quat_end_xyzw]) + key_times = [0, 1] + slerp = Slerp(key_times, key_rots) + + # Interpolate for all time steps + interp_rots = slerp(t) + quat_interp_xyzw = interp_rots.as_quat() + + # Convert back to wxyz format + new_traj_7d[:, 3] = quat_interp_xyzw[:, 3] # w + new_traj_7d[:, 4] = quat_interp_xyzw[:, 0] # x + new_traj_7d[:, 5] = quat_interp_xyzw[:, 1] # y + new_traj_7d[:, 6] = quat_interp_xyzw[:, 2] # z + else: # linear + # Linear interpolation (needs normalization) + for i in range(N): + new_traj_7d[i, 3:7] = (1 - t[i]) * quat_start + t[i] * quat_end + new_traj_7d[i, 3:7] /= np.linalg.norm(new_traj_7d[i, 3:7]) + + # Convert back to SE(3) matrices using pose_to_mat + new_traj = [] + for pose_7d in new_traj_7d: + pose_mat = pose_to_mat(pose_7d[:3], pose_7d[3:7]) + new_traj.append(pose_mat) + new_traj = np.array(new_traj) + + return new_traj + + + def select_randomized_cfg(self, train_set_size: int): + return random.choice(self.task_cfg.generated_trajectories[train_set_size:]) + + + + diff --git a/openreal2sim/simulation/modules/envs/task_cfg.py b/openreal2sim/simulation/modules/envs/task_cfg.py new file mode 100755 index 0000000..74dad25 --- /dev/null +++ b/openreal2sim/simulation/modules/envs/task_cfg.py @@ -0,0 +1,86 @@ + +from enum import Enum, auto +from typing import List, Dict, Optional +from dataclasses import dataclass, field + +class SuccessMetricType(Enum): + SIMPLE_LIFT = auto() + TARGET_POINT = auto() + TARGET_PLANE = auto() + +class TaskType(Enum): + SIMPLE_PICK = auto() + SIMPLE_PICK_PLACE = auto() + TARGETTED_PICK_PLACE = auto() + +class RobotType(Enum): + FRANKA = auto() + UR5 = auto() + + +@dataclass +class ObjectCfg: + object_id: int + object_name: str + mesh_path: str + usd_path: str + + +@dataclass +class SuccessMetric: + success_metric_type: SuccessMetricType + final_gripper_close:bool + lift_height: Optional[float] = None + ground_value: Optional[float] = None + end_pose: Optional[List[float]] = None + + +@dataclass +class TrajectoryCfg: + robot_pose: List[float] # quat wxyz + object_poses: Dict[int, List[float]] # quat + object_trajectory: List[List[float]] # quat + final_gripper_close: bool + success_metric: SuccessMetric + pregrasp_pose: Optional[List[float]] = None ### This is eef in world frame. quat + grasp_pose: Optional[List[float]] = None ### This is eef in world frame. quat + robot_type: Optional[RobotType] = None + + + +@dataclass +class CameraInfo: + width: float + height: float + fx: float + fy: float + cx: float + cy: float + camera_opencv_to_world: List[List[float]] + camera_position: List[float] + camera_heading_wxyz: List[float] + +@dataclass +class TaskCfg: + task_key: str + task_id: int + task_desc: List[str] + task_type: TaskType + bg_rgb_path: str + background_path: str + background_usd_path: str + camera_info: CameraInfo + manipulated_oid: int + start_related: List[int] + end_related: List[int] + objects: List[ObjectCfg] + reference_trajectory: Optional[TrajectoryCfg] = None + generated_trajectories: List[TrajectoryCfg] = field(default_factory=list) + + + + + + + + diff --git a/openreal2sim/simulation/modules/envs/task_construct.py b/openreal2sim/simulation/modules/envs/task_construct.py new file mode 100644 index 0000000..cbca7cc --- /dev/null +++ b/openreal2sim/simulation/modules/envs/task_construct.py @@ -0,0 +1,227 @@ +### construct the task config from scene dict. + +import json +import numpy as np +from enum import Enum +import shutil +from pathlib import Path +import os +import sys +file_path = Path(__file__).resolve() +sys.path.append(str(file_path.parent)) +from .task_cfg import TaskCfg, TaskType, ObjectCfg, CameraInfo, TrajectoryCfg, SuccessMetric, SuccessMetricType, RobotType + +def get_next_id(folder: Path) -> int: + if not folder.exists(): + os.makedirs(folder, exist_ok=True) + return 0 + subfolders = [f for f in folder.iterdir() if f.is_dir()] + task_num = len(subfolders) + return task_num + + +def construct_task_config(key, scene_dict: dict, base_folder: Path): + task_key = key + task_id = get_next_id(base_folder) + task_desc = scene_dict["task_desc"] + base_folder = base_folder / key + if base_folder.exists(): + shutil.rmtree(base_folder) + base_folder.mkdir(parents=True, exist_ok=True) # Create directory before copying files + background_path = scene_dict["background"]["registered"] + background_usd_path = scene_dict["background"]["usd"] + shutil.copy(background_path, base_folder / "background.glb") + shutil.copy(background_usd_path, base_folder / "background.usd") + background_path = base_folder / "background.glb" + background_usd_path = base_folder / "background.usd" + + bg_rgb_path = scene_dict["bg_rgb_path"] + shutil.copy(bg_rgb_path, base_folder / "bg_rgb.jpg") + bg_rgb_path = base_folder / "bg_rgb.jpg" + width = scene_dict["camera"]["width"] + height = scene_dict["camera"]["height"] + fx = scene_dict["camera"]["fx"] + fy = scene_dict["camera"]["fy"] + cx = scene_dict["camera"]["cx"] + cy = scene_dict["camera"]["cy"] + camera_opencv_to_world = scene_dict["camera"]["camera_opencv_to_world"] + camera_position = scene_dict["camera"]["camera_position"] + camera_heading_wxyz = scene_dict["camera"]["camera_heading_wxyz"] + camera_info = CameraInfo(width, height, fx, fy, cx, cy, camera_opencv_to_world, camera_position, camera_heading_wxyz) + + objects = [] + for oid, obj in scene_dict["objects"].items(): + object_id = oid + object_name = obj["name"] + mesh_path = obj["optimized"] + shutil.copy(mesh_path, base_folder / f"object_{object_id}.glb") + mesh_path = base_folder / f"object_{object_id}.glb" + usd_path = obj['usd'] + + shutil.copy(usd_path, base_folder / Path(usd_path).name) + + cfg_path = Path(usd_path).parent / "config.yaml" + asset_hash_path = Path(usd_path).parent / ".asset_hash" + usd_path = base_folder / Path(usd_path).name + shutil.copy(cfg_path, base_folder / "config.yaml") + shutil.copy(asset_hash_path, base_folder / ".asset_hash") + object_cfg = ObjectCfg(object_id, object_name, str(mesh_path), str(usd_path)) + objects.append(object_cfg) + + + manipulated_oid = scene_dict["manipulated_oid"] + start_related = scene_dict["start_related"] + end_related = scene_dict["end_related"] + + if scene_dict["task_type"] == "targetted_pick_place": + task_type = TaskType.TARGETTED_PICK_PLACE + elif scene_dict["task_type"] == "simple_pick_place": + task_type = TaskType.SIMPLE_PICK_PLACE + elif scene_dict["task_type"] == "simple_pick": + task_type = TaskType.SIMPLE_PICK + else: + raise ValueError(f"Invalid task type: {scene_dict['info']['task_type']}") + task_config = TaskCfg(task_key, task_id, task_desc, task_type, str(bg_rgb_path), str(background_path), str(background_usd_path), camera_info, manipulated_oid, start_related, end_related, objects) + json_path = base_folder / "task.json" + with open(json_path, "w") as f: + json.dump(serialize_task_cfg(task_config), f) + return task_config, base_folder + + +def add_reference_trajectory(task_cfg: TaskCfg, reference_trajectory: TrajectoryCfg, base_folder: Path): + task_cfg.reference_trajectory = reference_trajectory + json_path = base_folder / "task.json" + with open(json_path, "w") as f: + json.dump(serialize_task_cfg(task_cfg), f) + return task_cfg + + + + +def serialize_task_cfg(task_cfg): + """ + Serialize TaskCfg and all nested fields (including numpy arrays) into pure Python dict/list/primitive types, + so it can be safely saved as JSON. + """ + + def serialize(obj): + # Handle None + if obj is None: + return None + # Handle basic types + elif isinstance(obj, (int, float, str, bool)): + return obj + # Handle numpy array + elif isinstance(obj, np.ndarray): + return obj.tolist() + # Handle enum + elif hasattr(obj, 'name') and isinstance(obj, (Enum,)): + return obj.name + # Handle dict + elif isinstance(obj, dict): + return {serialize(k): serialize(v) for k, v in obj.items()} + # Handle list/tuple + elif isinstance(obj, (list, tuple)): + return [serialize(i) for i in obj] + # Handle dataclass/object with __dict__ + elif hasattr(obj, '__dict__'): + data = {} + for key, value in obj.__dict__.items(): + data[key] = serialize(value) + return data + # Handle class with __slots__ + elif hasattr(obj, '__slots__'): + data = {} + for slot in obj.__slots__: + data[slot] = serialize(getattr(obj, slot)) + return data + # Fallback (e.g. Path objects) + elif hasattr(obj, '__str__'): + return str(obj) + else: + raise TypeError(f"Cannot serialize object of type {type(obj)}: {repr(obj)}") + + return serialize(task_cfg) + + + + + +def load_task_cfg(json_path: Path) -> TaskCfg: + """ + Load a TaskCfg from the given JSON file path and construct a TaskCfg instance. + """ + with open(json_path, "r") as f: + cfg_dict = json.load(f) + + # Handle all fields and reconstruct proper datatypes + # Helper to reconstruct enums + def parse_enum(enum_cls, val): + if isinstance(val, enum_cls): + return val + elif isinstance(val, str): + return enum_cls[val] + else: + raise ValueError(f"Unknown value {val} for enum {enum_cls}") + + # Parse SuccessMetric(s) + def parse_success_metric(metric_dict): + return SuccessMetric( + success_metric_type=parse_enum(SuccessMetricType, metric_dict["success_metric_type"]), + final_gripper_close=metric_dict["final_gripper_close"], + lift_height=metric_dict.get("lift_height", None), + ground_value=metric_dict.get("ground_value", None), + end_pose=metric_dict.get("end_pose", None) + ) + + # Parse TrajectoryCfg(s) + def parse_traj_cfg(traj_dict): + return TrajectoryCfg( + robot_pose=np.array(traj_dict["robot_pose"], dtype=np.float32).tolist(), + object_poses={oid: np.array(pose, dtype=np.float32).tolist() for oid, pose in traj_dict["object_poses"].items()}, + object_trajectory=[np.array(m, dtype=np.float32).tolist() for m in traj_dict["object_trajectory"]], + final_gripper_close=traj_dict.get("final_gripper_close", None), + success_metric=parse_success_metric(traj_dict.get("success_metric", None)), + pregrasp_pose=traj_dict.get("pregrasp_pose", None), + grasp_pose=traj_dict.get("grasp_pose", None), + robot_type=parse_enum(RobotType, traj_dict.get("robot_type", None)) + ) + + def parse_camera_info(camera_dict): + return CameraInfo( + width=camera_dict["width"], + height=camera_dict["height"], + fx=camera_dict["fx"], + fy=camera_dict["fy"], + cx=camera_dict["cx"], + cy=camera_dict["cy"], + camera_opencv_to_world=np.array(camera_dict["camera_opencv_to_world"], dtype=np.float32).tolist(), + camera_position=np.array(camera_dict["camera_position"], dtype=np.float32).tolist(), + camera_heading_wxyz=np.array(camera_dict["camera_heading_wxyz"], dtype=np.float32).tolist(), + ) + def parse_object_cfg(object_dict): + return ObjectCfg( + object_id=object_dict["object_id"], + object_name=object_dict["object_name"], + mesh_path=object_dict["mesh_path"], + usd_path=object_dict["usd_path"] + ) + # Compose TaskCfg + task_cfg = TaskCfg( + task_id=cfg_dict["task_id"], + task_desc=cfg_dict["task_desc"], + task_key=cfg_dict["task_key"], + task_type=parse_enum(TaskType, cfg_dict["task_type"]), + bg_rgb_path=cfg_dict["bg_rgb_path"], + background_path=cfg_dict["background_path"], + background_usd_path=cfg_dict["background_usd_path"], + camera_info=parse_camera_info(cfg_dict["camera_info"]), + manipulated_oid=cfg_dict["manipulated_oid"], + start_related=cfg_dict["start_related"], + end_related=cfg_dict["end_related"], + objects=[parse_object_cfg(obj) for obj in cfg_dict["objects"]], + reference_trajectory=parse_traj_cfg(cfg_dict.get("reference_trajectory", None)), + generated_trajectories=[parse_traj_cfg(traj) for traj in cfg_dict.get("generated_trajectories", [])] + ) + + return task_cfg \ No newline at end of file diff --git a/openreal2sim/simulation/isaaclab/sim_preprocess/grasp_generation.py b/openreal2sim/simulation/modules/grasp_preprocess/grasp_generation.py old mode 100644 new mode 100755 similarity index 94% rename from openreal2sim/simulation/isaaclab/sim_preprocess/grasp_generation.py rename to openreal2sim/simulation/modules/grasp_preprocess/grasp_generation.py index 9d20fae..895f4ef --- a/openreal2sim/simulation/isaaclab/sim_preprocess/grasp_generation.py +++ b/openreal2sim/simulation/modules/grasp_preprocess/grasp_generation.py @@ -153,7 +153,7 @@ def run_for_key(key: str, vis_pts_per_gripper: int): base_dir = Path.cwd() out_dir = base_dir / "outputs" - scene_json = out_dir / key / "scene" / "scene.json" + scene_json = out_dir / key / "simulation" / "scene.json" if not scene_json.exists(): raise FileNotFoundError(scene_json) @@ -191,6 +191,22 @@ def run_for_key(key: str, json.dump(scene_dict, f, indent=2) print(f"[OK][{key}] scene.json updated with 'grasps' for {obj['oid']}_{obj['name']}.") + +def grasp_generation(keys): + for key in keys: + print(f"\n========== [GraspDet] Processing key: {key} ==========") + run_for_key( + key=key, + n_points=args.n_points, + keep=args.keep, + nms=args.nms, + overwrite=args.overwrite, + vis_pts_per_gripper=args.vis_pts_per_gripper, + ) + + print('[Info] Grasp generation completed.') + + def main(): parser = argparse.ArgumentParser("Export grasp proposals for ALL objects (batch over keys) and write paths into scene.json") parser.add_argument("--n_points", type=int, default=100000) diff --git a/openreal2sim/simulation/isaaclab/sim_preprocess/grasp_utils.py b/openreal2sim/simulation/modules/grasp_preprocess/grasp_utils.py old mode 100644 new mode 100755 similarity index 100% rename from openreal2sim/simulation/isaaclab/sim_preprocess/grasp_utils.py rename to openreal2sim/simulation/modules/grasp_preprocess/grasp_utils.py diff --git a/openreal2sim/simulation/modules/hand_preprocess/affordance_extraction.py b/openreal2sim/simulation/modules/hand_preprocess/affordance_extraction.py new file mode 100755 index 0000000..e69de29 diff --git a/openreal2sim/simulation/modules/hand_preprocess/grasp_point_extraction.py b/openreal2sim/simulation/modules/hand_preprocess/grasp_point_extraction.py new file mode 100755 index 0000000..942088e --- /dev/null +++ b/openreal2sim/simulation/modules/hand_preprocess/grasp_point_extraction.py @@ -0,0 +1,331 @@ +### This part is used to generate the affordance of the object. +### Include: +### 1. Grasp point generation. +### 2. Affordance map generation. Mainly for articulated object. This might be further combined with a PartSLIP network. +### TODO: Human annotation. +### The logic we use for grasp point generation here is: we select the frist frame of object-hand contact, compute the contact point, overlay the object and extract the 3D coordinate of the point. +### Be careful that we transfer this point to the first frame using pose matrices and extrinsics. +### TODO: This might be helpful to object selection in the first step. +import pickle +import pathlib +from pathlib import Path +import logging +import yaml +import os +import sys +import torch +import numpy as np +import cv2 +import imageio +import pytorch3d +from pytorch3d.structures import Meshes +from pytorch3d.renderer import ( + look_at_view_transform, + PerspectiveCameras, + RasterizationSettings, + MeshRenderer, + MeshRasterizer, + SoftPhongShader, + PointLights, + TexturesVertex, + BlendParams, +) +import trimesh + + +def render_pytorch3d_rgbd(mesh, K, img_size, device="cuda:0"): + """ + Given a trimesh mesh (already in camera coordinate system), render using pytorch3d. + Args: + mesh: trimesh.Trimesh, should be in camera coordinate system. + K: (3,3) camera intrinsic matrix, fx, fy, cx, cy. + img_size: (H, W), output image (and depth) size. + device: device string + Returns: + mask_img: (H,W) uint8 binary mask (1 for foreground, 0 for background) + depth_img: (H,W) float32 (Z buffer, 0 for background) + """ + if not isinstance(mesh, trimesh.Trimesh): + if hasattr(mesh, 'geometry') and len(mesh.geometry) > 0: + mesh = list(mesh.geometry.values())[0] + else: + raise ValueError('mesh is not a valid trimesh.Trimesh!') + device = torch.device(device if torch.cuda.is_available() else "cpu") + + verts = torch.tensor(np.asarray(mesh.vertices), dtype=torch.float32, device=device) + faces = torch.tensor(np.asarray(mesh.faces), dtype=torch.int64, device=device) + verts_rgb = torch.ones_like(verts)[None] * torch.tensor([[0.7, 0.7, 0.7]], dtype=torch.float32, device=device) + from pytorch3d.renderer import TexturesVertex + textures = TexturesVertex(verts_features=verts_rgb) + + mesh_p3d = Meshes(verts=[verts], faces=[faces], textures=textures) + + # Get camera intrinsics + fx = float(K[0, 0]) + fy = float(K[1, 1]) + cx = float(K[0, 2]) + cy = float(K[1, 2]) + if isinstance(img_size, int): + image_size = [[img_size, img_size]] + H, W = img_size, img_size + else: + H, W = img_size + image_size = [[H, W]] + + # Use pytorch3d's PerspectiveCameras for custom intrinsics + # Note: R and T are camera-to-world transforms (identity for camera coordinate system) + cameras = PerspectiveCameras( + device=device, + focal_length=torch.tensor([[fx, fy]], dtype=torch.float32, device=device), + principal_point=torch.tensor([[cx, cy]], dtype=torch.float32, device=device), + image_size=torch.tensor(image_size, dtype=torch.float32, device=device), + in_ndc=False, + R=torch.eye(3, dtype=torch.float32, device=device).unsqueeze(0), + T=torch.zeros(1, 3, dtype=torch.float32, device=device), + ) + + blend_params = BlendParams(sigma=1e-4, gamma=1e-4) + raster_settings = RasterizationSettings( + image_size=(H, W), + blur_radius=0.0, + faces_per_pixel=1, + ) + lights = PointLights(device=device, location=[[0.0, 0.0, 5.0]]) + + renderer = MeshRenderer( + rasterizer=MeshRasterizer( + cameras=cameras, + raster_settings=raster_settings + ), + shader=SoftPhongShader( + device=device, + cameras=cameras, + lights=lights, + blend_params=blend_params + ), + ) + # Render RGB (used only for mask) + img = renderer(mesh_p3d).detach().cpu().numpy()[0][:,:,:3].clip(0, 1) * 255 + img = img.astype(np.uint8) + # Depth renderer + class DepthShader(torch.nn.Module): + def __init__(self, device="cpu"): + super().__init__() + self.device = device + def forward(self, fragments, meshes, **kwargs): + return fragments.zbuf + depth_renderer = MeshRenderer( + rasterizer=MeshRasterizer( + cameras=cameras, + raster_settings=raster_settings + ), + shader=DepthShader(device=device) + ) + depth = depth_renderer(mesh_p3d)[0, ..., 0].detach().cpu().numpy() # (H, W) + mask = (depth > 0).astype(np.uint8) + return img, mask, depth + + +def find_nearest_point(point, object_mask): + """ + Given a point and an object binary mask (H,W), return the nearest point in the mask. + """ + H, W = object_mask.shape + ys, xs = np.where(object_mask) + if len(xs) == 0: + return None + dists = np.sqrt((xs - point[0]) ** 2 + (ys - point[1]) ** 2) + min_idx = np.argmin(dists) + return np.array([xs[min_idx], ys[min_idx]]) + +def get_bbox_mask_from_mask(mask): + ys, xs = np.where(mask) + if len(xs) == 0: + return None + x_min = np.min(xs) + x_max = np.max(xs) + y_min = np.min(ys) + y_max = np.max(ys) + return x_min, x_max, y_min, y_max + +def compute_contact_point(kpts_2d, object_mask): + """ + Given 2D keypoints (N,2) and an object binary mask (H,W), return the mean keypoint location [x, y] + of all keypoints that fall inside the mask. + """ + kpts_2d = np.asarray(kpts_2d) + H, W = object_mask.shape + inside = [] + for kp in kpts_2d: + x, y = int(round(kp[0])), int(round(kp[1])) + if 0 <= y < H and 0 <= x < W: + if object_mask[y, x]: + inside.append(kp) + if len(inside) > 0: + point = np.mean(np.stack(inside, axis=0), axis=0) + px, py = int(round(point[0])), int(round(point[1])) + if 0 <= py < H and 0 <= px < W and object_mask[py, px]: + return point, True, True + else: + return point, True, False + else: + return np.mean(kpts_2d, axis=0), False, False + +def bbox_to_mask(bbox): + x, y, w, h = bbox + mask = np.zeros((h, w)) + mask[y:y+h, x:x+w] = True + return mask + + +def visualize_grasp_points(image, kpts_2d, contact_point, output_path): + """ + Simple visualization of fingertip keypoints and contact point on image. + + Args: + image: Input image (H, W, 3) uint8 or float + kpts_2d: Fingertip keypoints array (N, 2) + contact_point: Contact point (2,) + output_path: Path to save visualization + """ + # Prepare image + vis_image = image.copy() + if vis_image.dtype != np.uint8: + if vis_image.max() <= 1.0: + vis_image = (vis_image * 255).astype(np.uint8) + else: + vis_image = vis_image.astype(np.uint8) + + # Convert RGB to BGR for OpenCV + if vis_image.shape[-1] == 3: + vis_image = cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR) + + H, W = vis_image.shape[:2] + + # Draw fingertip keypoints (blue) + kpts_2d = np.asarray(kpts_2d) + for kp in kpts_2d: + kp_x, kp_y = int(round(kp[0])), int(round(kp[1])) + if 0 <= kp_y < H and 0 <= kp_x < W: + cv2.circle(vis_image, (kp_x, kp_y), 5, (255, 0, 0), -1) # Blue filled circle + + # Draw contact point (red) + cp_x, cp_y = int(round(contact_point[0])), int(round(contact_point[1])) + if 0 <= cp_y < H and 0 <= cp_x < W: + cv2.circle(vis_image, (cp_x, cp_y), 8, (0, 0, 255), -1) # Red filled circle + + # Convert back to RGB and save + vis_image_rgb = cv2.cvtColor(vis_image, cv2.COLOR_BGR2RGB) + imageio.imwrite(str(output_path), vis_image_rgb) + logging.info(f"Saved grasp point visualization to: {output_path}") + + +def grasp_point_generation(keys, key_scene_dicts, key_cfgs): + base_dir = Path.cwd() + for key in keys: + scene_dict = key_scene_dicts[key] + scene_dict["key"] = key # Store key in scene_dict for visualization + key_cfg = key_cfgs[key] + single_grasp_point_generation(scene_dict, key_cfg, key, base_dir) + key_scene_dicts[key] = scene_dict + with open(base_dir / f'outputs/{key}/scene/scene.pkl', 'wb') as f: + pickle.dump(scene_dict, f) + +def single_grasp_point_generation(scene_dict, key_cfg, key, base_dir): + gpu_id = key_cfg["gpu"] + device = f"cuda:{gpu_id}" + start_frame_idx = scene_dict["info"]["start_frame_idx"] + points_2d = None + i = start_frame_idx + manipulated_oid = scene_dict["info"]["manipulated_oid"] + if_close_list = [] + is_inside_list = [] + backup_list = [] + extrinsics = np.array(scene_dict["extrinsics"]).astype(np.float64).reshape(-1, 4, 4) + T_c2w = np.array(scene_dict["info"]["camera"]["camera_opencv_to_world"]).astype(np.float64).reshape(4,4) + T_w2c = np.linalg.inv(T_c2w) + model_path = scene_dict["info"]["objects"][manipulated_oid]["optimized"] + model = trimesh.load(model_path) + + K = np.array(scene_dict["intrinsics"]).astype(np.float32).reshape(3, 3) + img_size = scene_dict["height"], scene_dict["width"] + + traj_key = scene_dict["info"]["traj_key"] + traj_key = traj_key.replace("_recomputed", "") + traj_path = scene_dict["info"]["objects"][manipulated_oid][traj_key] + traj = np.load(traj_path) + traj = traj.reshape(-1, 4, 4) + start_pose = traj[start_frame_idx] + model = model.apply_transform(start_pose) + model = model.apply_transform(T_w2c) + + + images, mask_img, depth = render_pytorch3d_rgbd(model, K, img_size, device=device) + kpts_2d = scene_dict["simulation"]["hand_kpts"][i][[4,8,12,16,20]] + point_2d, is_close, is_inside = compute_contact_point(kpts_2d, mask_img) + + # Use PyTorch3D's unproject_points for accurate 2D-to-3D conversion + x, y = int(round(point_2d[0])), int(round(point_2d[1])) + image = scene_dict["images"][i] + + # Visualize keypoints and contact point + vis_dir = base_dir / f"outputs/{key}/simulation/debug" + vis_dir.mkdir(parents=True, exist_ok=True) + vis_path = vis_dir / f"grasp_point_visualization_frame_{i:06d}.png" + visualize_grasp_points(image, kpts_2d, point_2d, vis_path) + mask_save_path = vis_dir / f"mask_frame_{i:06d}.png" + cv2.imwrite(mask_save_path, mask_img * 255) + image_save_path = vis_dir / f"image_frame_{i:06d}.png" + imageio.imwrite(image_save_path, images) + + + + # Create camera object for unprojection (same as in render function) + fx, fy, cx, cy = float(K[0, 0]), float(K[1, 1]), float(K[0, 2]), float(K[1, 2]) + H, W = img_size + + camera = PerspectiveCameras( + device=device, + focal_length=torch.tensor([[fx, fy]], dtype=torch.float32, device=device), + principal_point=torch.tensor([[cx, cy]], dtype=torch.float32, device=device), + image_size=torch.tensor([[H, W]], dtype=torch.float32, device=device), + in_ndc=False, + R=torch.eye(3, dtype=torch.float32, device=device).unsqueeze(0), + T=torch.zeros(1, 3, dtype=torch.float32, device=device), + ) + + # Unproject 2D point to 3D using PyTorch3D + xy_coords = torch.tensor([[x, y]], dtype=torch.float32, device=device) # (1, 2) + depth_values = torch.tensor([[z]], dtype=torch.float32, device=device) # (1, 1) + + # Unproject to camera space (since mesh is already in camera coordinates) + points_camera = camera.unproject_points( + xy_depth=torch.cat([xy_coords, depth_values], dim=1), # (1, 3) [x, y, depth] + world_coordinates=False, # Output in camera coordinates + scaled_depth_input=False, # Depth is in world units + ) + + winner_point_3d = points_camera[0].cpu().numpy() # (3,) in camera coordinates + + winner_point_to_obj = winner_point_3d @ np.linalg.inv(start_pose) + return winner_point_to_obj + + + +if __name__ == "__main__": + base_dir = Path.cwd() + cfg_path = base_dir / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + keys = cfg["keys"] + sys.path.append(str(base_dir / "openreal2sim" / "simulation")) + from utils.compose_config import compose_configs + key_cfgs = {key: compose_configs(key, cfg) for key in keys} + print(f"Key cfgs: {key_cfgs}") + key_scene_dicts = {} + for key in keys: + scene_pkl = base_dir / f'outputs/{key}/scene/scene.pkl' + with open(scene_pkl, 'rb') as f: + scene_dict = pickle.load(f) + key_scene_dicts[key] = scene_dict + grasp_point_generation(keys, key_scene_dicts, key_cfgs) + diff --git a/openreal2sim/simulation/modules/hand_preprocess/hand_extraction.py b/openreal2sim/simulation/modules/hand_preprocess/hand_extraction.py new file mode 100755 index 0000000..79b25ed --- /dev/null +++ b/openreal2sim/simulation/modules/hand_preprocess/hand_extraction.py @@ -0,0 +1,213 @@ +## In this file: we use hand-object detector to decide the starting and ending frame of contact. +## Then we use wilor to extract the hand info. + + +import os +import sys +import cv2 +import numpy as np +import torch +import yaml +from typing import List, Tuple, Dict, Optional +import logging +from pathlib import Path +import pickle +import logging +import tqdm +base_dir = Path.cwd() +sys.path.append(str(base_dir / "openreal2sim" / "simulation")) +sys.path.append(str(base_dir / "third_party" / "WiLoR")) +sys.path.append(str(base_dir / "third_party" / "Grounded-SAM-2")) + + +from wilor.utils.renderer import cam_crop_to_full +from wilor.models import load_wilor +from wilor.utils import recursive_to +from wilor.datasets.vitdet_dataset import ViTDetDataset +from sam2.build_sam import build_sam2 +from sam2.sam2_image_predictor import SAM2ImagePredictor +from ultralytics import YOLO + +class WiLoRExtractor: + def __init__(self, + model_path: str, + cfg_path: str, + yolo_weights_path: str, + sam_weights_path: str, + sam_cfg_path: str, + device: str): + self._wilor_model, self._wilor_cfg = load_wilor(model_path, cfg_path) + self._wilor_model.eval() + self._yolo_detector = YOLO(yolo_weights_path) + self.device = torch.device(device) + self._sam_predictor = SAM2ImagePredictor(build_sam2(sam_cfg_path, sam_weights_path)) + + def process(self, images: np.ndarray, batch_size: int = 16, rescale_factor: float = 1.0): + boxes = [] + right = [] + masks = [] + self._wilor_model.to(self.device) + self._yolo_detector.to(self.device) + self._sam_predictor.model.to(self.device) + self._wilor_model.eval() + + all_global_orient = [] + all_kpts = [] + all_masks = [] + + for i in tqdm.tqdm(range(0, len(images)), desc="Detecting hands"): + batch = np.array(images)[i] + with torch.no_grad(): + detections = self._yolo_detector.predict(batch, conf=0.3, verbose=False, save=False, show=False)[0] + + if len(detections.boxes.cls.cpu().detach().numpy()) == 0: + all_global_orient.append(None) + all_kpts.append(None) + all_masks.append(None) + else: + det = max(detections, key=lambda d: d.boxes.conf.cpu().detach().item()) + Bbox = det.boxes.data.cpu().detach().squeeze().numpy() + cls_flag = det.boxes.cls.cpu().detach().squeeze().item() + + self._sam_predictor.set_image(batch) + hand_masks, scores, logits = self._sam_predictor.predict( + point_coords=None, + point_labels=None, + box=np.array([[Bbox[0], Bbox[1], Bbox[2], Bbox[3]]]), + multimask_output=False + ) + hand_masks = hand_masks[0] + hand_masks = hand_masks.astype(bool) + dataset = ViTDetDataset(self._wilor_cfg, batch, np.array([Bbox[:4]]), np.array([cls_flag]), rescale_factor=rescale_factor) + dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=0) + sign = False + for batch in dataloader: + assert sign == False + batch = recursive_to(batch, torch.device(self.device)) + with torch.no_grad(): + out = self._wilor_model(batch) + multiplier = 2 * batch['right'][0].cpu().numpy() - 1 + pred_cam = out['pred_cam'][0] + pred_cam[1] = multiplier * pred_cam[1] + box_center = batch['box_center'][0].float() + box_size = batch['box_size'][0].float() + img_size = batch['img_size'][0].float() + scaled_focal_length = self._wilor_cfg.EXTRA.FOCAL_LENGTH / self._wilor_cfg.MODEL.IMAGE_SIZE * img_size.max() + #import pdb; pdb.set_trace() + pred_cam_t_full = cam_crop_to_full(pred_cam.reshape(1, 3), box_center.reshape(1, 2), box_size.reshape(1, 1), img_size.reshape(1, 2), scaled_focal_length).detach().cpu().numpy() + batch_size = batch['img'].shape[0] + joints = out['pred_keypoints_3d'][0].detach().cpu().numpy() + joints[:, 0] = multiplier * joints[:, 0] + cam_t = pred_cam_t_full[0] + kpts_2d = self.project_full_img(joints, cam_t, float(scaled_focal_length), img_size) + all_kpts.append(kpts_2d) + all_global_orient.append(out['pred_mano_params']['global_orient'][0,0].detach().cpu().numpy()) + all_masks.append(hand_masks) + sign = True + return all_kpts, all_global_orient, all_masks + + + + + + def project_full_img(self, points, cam_trans, focal_length, img_res): + ''' we use simple K here. It works.''' + if not isinstance(points, torch.Tensor): + points = torch.tensor(points, dtype=torch.float32) + if not isinstance(cam_trans, torch.Tensor): + cam_trans = torch.tensor(cam_trans, dtype=torch.float32) + # Ensure numeric image resolution + try: + img_w = float(img_res[0]) + img_h = float(img_res[1]) + except Exception: + # Fallback for unexpected types + img_w, img_h = float(img_res[0].item()), float(img_res[1].item()) + K = torch.eye(3, dtype=torch.float32) + K[0, 0] = float(focal_length) + K[1, 1] = float(focal_length) + K[0, 2] = img_w / 2.0 + K[1, 2] = img_h / 2.0 + pts = points + cam_trans + pts = pts / pts[..., -1:] + V_2d = (K @ pts.T).T + return V_2d[..., :-1].detach().cpu().numpy() + + + + + + +def hand_extraction(keys, key_scene_dicts, key_cfgs): + base_dir = Path.cwd() + model_path = base_dir / "third_party" / "WiLoR" / "pretrained_models" / "wilor_final.ckpt" + cfg_path = base_dir / "third_party" / "WiLoR" / "pretrained_models" / "model_config.yaml" + yolo_weights_path = base_dir / "third_party" / "WiLoR" / "pretrained_models" / "detector.pt" + sam_cfg_path = "configs/sam2.1/sam2.1_hiera_l.yaml" + sam_weights_path = "third_party/Grounded-SAM-2/checkpoints/sam2.1_hiera_large.pt" + + for key in keys: + + scene_dict = key_scene_dicts[key] + config = key_cfgs[key] + gpu_id = config['gpu'] + device = f"cuda:{gpu_id}" + wilor_extractor = WiLoRExtractor(model_path=model_path, cfg_path=cfg_path, yolo_weights_path=yolo_weights_path, sam_weights_path=sam_weights_path, sam_cfg_path=sam_cfg_path, device=device) + #images = np.transpose(scene_dict["images"], (0, 3, 1, 2)) + images = scene_dict["images"].astype(np.float32) + kpts, global_orient, masks = wilor_extractor.process(images) + if scene_dict.get("simulation") is None: + scene_dict["simulation"] = {} + scene_dict["simulation"]["hand_kpts"] = kpts + scene_dict["simulation"]["hand_global_orient"] = global_orient + scene_dict["simulation"]["hand_masks"] = masks + key_scene_dicts[key] = scene_dict + with open(base_dir / f'outputs/{key}/scene/scene.pkl', 'wb') as f: + pickle.dump(scene_dict, f) + + # import cv2 + # start_frame_idx = scene_dict["recon"]["start_frame_idx"] + + # first_img = scene_dict["images"][start_frame_idx] + # first_kpts = kpts[start_frame_idx] + # first_masks = masks[start_frame_idx] + # pts2d = np.asarray(first_kpts).astype(np.int32) + # overlay_img = first_img.copy() + # import pdb; pdb.set_trace() + # for x, y in pts2d: + # cv2.circle(overlay_img, (int(x), int(y)), 4, (0, 255, 0), -1) + # mask_vis = (first_masks.astype(np.uint8) * 255) + # if mask_vis.ndim == 2: + # mask_vis = cv2.cvtColor(mask_vis, cv2.COLOR_GRAY2BGR) + # overlay_alpha = 0.4 + # mask_color = np.zeros_like(overlay_img) + # mask_color[:, :, 2] = mask_vis[:, :, 0] + # overlay_img = cv2.addWeighted(overlay_img, 1.0, mask_color, overlay_alpha, 0) + # save_dir = base_dir / f'outputs/{key}/simulation/' + # save_dir.mkdir(parents=True, exist_ok=True) + # overlay_path = save_dir / "first_frame_hand_kpts_overlay.jpg" + # cv2.imwrite(str(overlay_path), overlay_img) + # print(f"Saved hand keypoints overlay for {key} to {overlay_path}") + return key_scene_dicts + + + + +if __name__ == "__main__": + base_dir = Path.cwd() + cfg_path = base_dir / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + keys = cfg["keys"] + + from utils.compose_config import compose_configs + key_cfgs = {key: compose_configs(key, cfg) for key in keys} + print(f"Key cfgs: {key_cfgs}") + key_scene_dicts = {} + for key in keys: + scene_pkl = base_dir / f'outputs/{key}/scene/scene.pkl' + with open(scene_pkl, 'rb') as f: + scene_dict = pickle.load(f) + key_scene_dicts[key] = scene_dict + hand_extraction(keys, key_scene_dicts, key_cfgs) + + diff --git a/openreal2sim/simulation/sim_agent.py b/openreal2sim/simulation/sim_agent.py new file mode 100755 index 0000000..486654b --- /dev/null +++ b/openreal2sim/simulation/sim_agent.py @@ -0,0 +1,248 @@ +### isaaclab + +from pathlib import Path +import sys +import pickle +import yaml +import json +import argparse +import shutil +import numpy as np +from isaaclab.app import AppLauncher +import typing +from typing import Optional +sys.path.append(str(Path.cwd() / "IsaacLab")) +sys.path.append(str(Path.cwd() / "openreal2sim" / "simulation")) +from utils.compose_config import compose_configs +from utils.notification import notify_started, notify_failed, notify_success + +class ProprocessAgent: + def __init__(self, stage=None, key=None): + print('[Info] Initializing ProprocessAgent...') + self.base_dir = Path.cwd() + cfg_path = self.base_dir / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + self.keys = [key] if key is not None else cfg["keys"] + self.key_cfgs = {key: compose_configs(key, cfg) for key in self.keys} + self.key_scene_dicts = {} + for key in self.keys: + scene_pkl = self.base_dir / f'outputs/{key}/scene/scene.pkl' + with open(scene_pkl, 'rb') as f: + scene_dict = pickle.load(f) + self.key_scene_dicts[key] = scene_dict + self.stages = [ + # "hand_extraction", + #"demo_motion_process", + # "grasp_point_generation", + # "grasp_generation" + + ] + if stage is not None: + if stage in self.stages: + start_idx = self.stages.index(stage) + self.stages = self.stages[start_idx:] + else: + print(f"[Warning] Stage '{stage}' not found. It must be one of the {self.stages}. Running all stages by default.") + print('[Info] ProprocessAgent initialized.') + + + + def hand_extraction(self): + from modules.hand_preprocess.hand_extraction import hand_extraction + self.key_scene_dicts = hand_extraction(self.keys, self.key_scene_dicts, self.key_cfgs) + print('[Info] Hand extraction completed.') + + def demo_motion_process(self): + from modules.demo_motion_process import demo_motion_process + self.key_scene_dicts = demo_motion_process(self.keys, self.key_scene_dicts, self.key_cfgs) + print('[Info] Demo motion process completed.') + + # def grasp_point_generation(self): + # from modules.grasp_point_generation import grasp_point_generation + # self.key_scene_dicts = grasp_point_generation(self.keys, self.key_scene_dicts, self.key_cfgs) + # print('[Info] Grasp point generation completed.') + + def grasp_generation(self): + from modules.grasp_preprocess.grasp_generation import grasp_generation + grasp_generation(self.keys) + print('[Info] Grasp proposal generation completed.') + + def run(self): + if "hand_extraction" in self.stages: + self.hand_extraction() + if "demo_motion_process" in self.stages: + self.demo_motion_process() + # if "grasp_point_generation" in self.stages: + # self.grasp_point_generation() + if "grasp_generation" in self.stages: + self.grasp_generation() + + print('[Info] ProprocessAgent run completed.') + return self.key_scene_dicts + + + + + +def create_args_from_config(config_path: Optional[str] = None, config_dict: Optional[dict] = None) -> argparse.Namespace: + """ + Create an argparse.Namespace from a config file (yaml/json) or dictionary. + + Args: + config_path: Path to yaml or json config file + config_dict: Dictionary with config parameters + + Returns: + argparse.Namespace with the config parameters + + Example config file (yaml): + key: "demo_video" + robot: "franka" + num_envs: 4 + num_trials: 10 + teleop_device: "keyboard" + sensitivity: 1.0 + device: "cuda:0" + enable_cameras: true + headless: true + """ + + if config_dict is None: + config_dict = { + 'robot': 'franka', + 'num_envs': 5, + 'num_trials': 10, + 'teleop_device': 'keyboard', + 'sensitivity': 1.0, + 'device': 'cuda:0', + 'enable_cameras': True, + 'headless': True, + } + else: + config_dict = { + 'robot': config_dict['robot'], + 'num_envs': config_dict['num_envs'], + 'num_trials': config_dict['num_trials'], + 'teleop_device': config_dict['teleop_device'], + 'sensitivity': config_dict['sensitivity'], + 'device': config_dict['device'], + 'enable_cameras': config_dict['enable_cameras'], + 'headless': config_dict['headless'], + } + + return argparse.Namespace(**config_dict) + + +class IsaacAgent: + def __init__(self, stage=None, key=None): + print('[Info] Initializing IsaacAgent...') + self.base_dir = Path.cwd() + cfg_path = self.base_dir / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + self.keys = [key] if key is not None else cfg["keys"] + self.key_cfgs = {key: compose_configs(key, cfg) for key in self.keys} + self.key_scene_dicts = {} + for key in self.keys: + scene_pkl = self.base_dir / f'outputs/{key}/scene/scene.pkl' + with open(scene_pkl, 'rb') as f: + scene_dict = pickle.load(f) + self.key_scene_dicts[key] = scene_dict + self.stages = [ + # "usd_conversion", + #"grasp_generation", + #"sim_heuristic_manip", + "sim_randomize_rollout", + + ] + if stage is not None: + if stage in self.stages: + start_idx = self.stages.index(stage) + self.stages = self.stages[start_idx:] + else: + print(f"[Warning] Stage '{stage}' not found. It must be one of the {self.stages}. Running all stages by default.") + print('[Info] IsaacAgent initialized.') + + def launch_isaaclab(self): + + self.args_cli = create_args_from_config() + #AppLauncher.add_app_launcher_args(self.args_cli) + self.args_cli.enable_cameras = True + self.args_cli.headless = True + self.app_launcher = AppLauncher(vars(self.args_cli)) + self.simulation_app = self.app_launcher.app + return self.simulation_app + + def close_isaaclab(self): + self.simulation_app.close() + + def usd_conversion(self): + from isaaclab.sim_preprocess.usd_conversion import usd_conversion + usd_conversion(self.keys) + print('[Info] USD conversion completed.') + + def sim_heuristic_manip(self): + self.launch_isaaclab() + from isaaclab.sim_heuristic_manip import sim_heuristic_manip + sim_heuristic_manip(self.keys, args_cli=self.args_cli) + print('[Info] Heuristic manipulation simulation completed.') + self.close_isaaclab() + + + def sim_randomize_rollout(self): + self.launch_isaaclab() + from isaaclab.sim_randomize_rollout import sim_randomize_rollout + sim_randomize_rollout(self.keys, args_cli=self.args_cli) + print('[Info] Randomize rollout completed.') + self.close_isaaclab() + + def grasp_generation(self): + from modules.grasp_preprocess.grasp_generation import grasp_generation + grasp_generation(self.keys) + print('[Info] Grasp proposal generation completed.') + + def run(self): + if "usd_conversion" in self.stages: + self.usd_conversion() + if "grasp_generation" in self.stages: + self.grasp_generation() + if "launch_isaaclab" in self.stages: + self.launch_isaaclab() + if "sim_heuristic_manip" in self.stages: + self.sim_heuristic_manip() + + if "sim_randomize_rollout" in self.stages: + self.sim_randomize_rollout() + if "close_isaaclab" in self.stages: + self.close_isaaclab() + +if __name__ == '__main__': + args = argparse.ArgumentParser() + args.add_argument('--stage', type=str, default=None, help='Starting from a certain stage') + args.add_argument('--key', type=str, default=None, help='Process a single key instead of all keys from config') + args.add_argument('--label', type=str, default=None, help='Optional label for notifications') + args = args.parse_args() + + if args.label: + notify_started(args.label) + + try: + agent = ProprocessAgent(stage=args.stage, key=args.key) + scene_dicts = agent.run() + + if args.label: + notify_success(args.label) + except Exception as e: + if args.label: + notify_failed(args.label) + raise + + try: + agent = IsaacAgent(stage=args.stage, key=args.key) + scene_dicts = agent.run() + + if args.label: + notify_success(args.label) + except Exception as e: + if args.label: + notify_failed(args.label) + raise \ No newline at end of file diff --git a/openreal2sim/simulation/utils/compose_config.py b/openreal2sim/simulation/utils/compose_config.py new file mode 100755 index 0000000..89c4410 --- /dev/null +++ b/openreal2sim/simulation/utils/compose_config.py @@ -0,0 +1,17 @@ + +default_config = { + "gpu": "0", +} + + +def compose_configs(key_name: str, config: dict) -> dict: + ret_key_config = {} + local_config = config["local"].get(key_name, {}) + local_config = local_config.get("simulation", {}) + global_config = config.get("global", {}) + global_config = global_config.get("simulation", {}) + for param in default_config.keys(): + value = local_config.get(param, global_config.get(param, default_config[param])) + ret_key_config[param] = value + print(f"[Info] Config for {key_name}: {ret_key_config}") + return ret_key_config \ No newline at end of file diff --git a/openreal2sim/simulation/utils/notification.py b/openreal2sim/simulation/utils/notification.py new file mode 100755 index 0000000..a36c546 --- /dev/null +++ b/openreal2sim/simulation/utils/notification.py @@ -0,0 +1,42 @@ +import requests + +ntfy_url = "" + +def notify_success(pipeline_label): + try: + requests.post( + ntfy_url, + data=pipeline_label, + headers={ + "Title": f"Reconstruction completed", + "Tags": "white_check_mark" + } + ) + except: + pass + +def notify_started(pipeline_label): + try: + requests.post( + ntfy_url, + data=pipeline_label, + headers={ + "Title": f"Reconstruction started", + "Tags": "watch" + } + ) + except: + pass + +def notify_failed(pipeline_label): + try: + requests.post( + ntfy_url, + data=pipeline_label, + headers={ + "Title": f"Reconstruction failed", + "Tags": "x" + } + ) + except: + pass \ No newline at end of file From 81cce38cdcffdac0356b0c406d4fb583f3308cd0 Mon Sep 17 00:00:00 2001 From: DynamoDuan <2030524470@qq.com> Date: Sat, 8 Nov 2025 11:13:30 -0800 Subject: [PATCH 2/2] refine --- openreal2sim/simulation/isaaclab/sim_base.py | 11 +- .../isaaclab/sim_heuristic_manip.py | 44 ++--- .../isaaclab/sim_preprocess/usd_conversion.py | 16 +- .../isaaclab/sim_randomize_rollout.py | 50 +++--- openreal2sim/simulation/sim_agent.py | 162 ++++++++---------- openreal2sim/simulation/sim_agent.sh | 136 +++++++++++++++ 6 files changed, 281 insertions(+), 138 deletions(-) create mode 100644 openreal2sim/simulation/sim_agent.sh diff --git a/openreal2sim/simulation/isaaclab/sim_base.py b/openreal2sim/simulation/isaaclab/sim_base.py index fa8a3cd..dfe2906 100755 --- a/openreal2sim/simulation/isaaclab/sim_base.py +++ b/openreal2sim/simulation/isaaclab/sim_base.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Any, Optional, Dict, Sequence, Tuple from typing import List - +import copy import numpy as np import torch import imageio @@ -689,7 +689,8 @@ def save_data(self, ignore_keys: List[str] = [], env_ids: Optional[List[int]] = if env_ids is None: env_ids = self._all_env_ids.cpu().numpy() - composed_rgb = [] + composed_rgb = copy.deepcopy(stacked["rgb"]) + self.save_dict["composed_rgb"] = composed_rgb hdf5_names = [] for b in env_ids: if self.defined_demo_dir is None: @@ -735,10 +736,10 @@ def save_data(self, ignore_keys: List[str] = [], env_ids: Optional[List[int]] = self.bg_rgb = imageio.imread(bg_rgb_path) for t in range(rgb.shape[0]): #import ipdb; ipdb.set_trace() - composed = self.convert_real(mask[t, b], self.bg_rgb, rgb[t, b]) + composed = self.convert_real(mask[t, b], self.bg_rgb, rgb[t, b]) + self.save_dict["composed_rgb"][t, b] = composed writer.append_data(composed) writer.close() - composed_rgb.append(composed) self.export_batch_data_to_hdf5(hdf5_names) @@ -899,7 +900,7 @@ def export_batch_data_to_hdf5(self, hdf5_names: List[str]) -> int: cam_grp.attrs["resolution"] = resolution if "rgb" in stacked: - rgb_frames = stacked["rgb"][:, env_idx] + rgb_frames = stacked["composed_rgb"][:, env_idx] encoded_frames, max_len = self._encode_rgb_sequence(rgb_frames) dtype = f"S{max_len}" if max_len > 0 else "S1" cam_grp.create_dataset("rgb", data=np.asarray(encoded_frames, dtype=dtype)) diff --git a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py index 8719d69..1ccfc3b 100755 --- a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py +++ b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py @@ -20,19 +20,19 @@ sys.path.append(str(file_path.parent.parent)) # ─────────── CLI ─────────── -# parser = argparse.ArgumentParser("sim_policy") -# parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") -# parser.add_argument("--robot", type=str, default="franka") -# parser.add_argument("--num_envs", type=int, default=1) -# parser.add_argument("--num_trials", type=int, default=1) -# parser.add_argument("--teleop_device", type=str, default="keyboard") -# parser.add_argument("--sensitivity", type=float, default=1.0) -# AppLauncher.add_app_launcher_args(parser) -# args_cli = parser.parse_args() -# args_cli.enable_cameras = True -# args_cli.headless = True # headless mode for batch execution -# app_launcher = AppLauncher(vars(args_cli)) -# simulation_app = app_launcher.app +parser = argparse.ArgumentParser("sim_policy") +parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") +parser.add_argument("--robot", type=str, default="franka") +parser.add_argument("--num_envs", type=int, default=1) +parser.add_argument("--num_trials", type=int, default=1) +parser.add_argument("--teleop_device", type=str, default="keyboard") +parser.add_argument("--sensitivity", type=float, default=1.0) +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() +args_cli.enable_cameras = True +args_cli.headless = True # headless mode for batch execution +app_launcher = AppLauncher(vars(args_cli)) +simulation_app = app_launcher.app # ─────────── Runtime imports ─────────── import isaaclab.sim as sim_utils @@ -829,7 +829,8 @@ def sim_heuristic_manip(keys: list[str], args_cli: argparse.Namespace, config_pa success = True task_cfg = my_sim.from_data_to_task_cfg(key) my_sim.task_cfg = task_cfg - my_sim.trajectory_cfg_list = [task_cfg.reference_trajectory] + assert task_cfg.reference_trajectory is not None + my_sim.traj_cfg_list = [task_cfg.reference_trajectory] my_sim.save_data(ignore_keys=["segmask", "depth"], env_ids=success_ids[:1]) break # actions = np.load("outputs/lab16/demos/demo_5/env_000/actions.npy") @@ -838,11 +839,14 @@ def sim_heuristic_manip(keys: list[str], args_cli: argparse.Namespace, config_pa print("[ERR] no successful environments!") env.close() #simulation_app.close() - return True + return True +def main(): + base_dir = Path.cwd() + cfg = yaml.safe_load((base_dir / "config" / "config.yaml").open("r")) + keys = cfg["keys"] + sim_heuristic_manip(keys, args_cli) - -#if __name__ == "__main__": - #main() - #os.system("quit()") - #simulation_app.close() +if __name__ == "__main__": + main() + simulation_app.close() diff --git a/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py b/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py index 1056fcd..2e98438 100755 --- a/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py +++ b/openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py @@ -9,7 +9,7 @@ AppLauncher.add_app_launcher_args(parser) args_isaaclab = parser.parse_args() # Force GUI unless you toggle via IsaacLab flags -args_isaaclab.headless = False +args_isaaclab.headless = True app_launcher = AppLauncher(args_isaaclab) simulation_app = app_launcher.app @@ -378,9 +378,11 @@ def count_rigid_api(usd_path): ######################################################## ## Per-key conversion (batch) ######################################################## -def run_conversion_for_key(key: str): +from typing import Optional +def run_conversion_for_key(key: str, args_cli:Optional[argparse.Namespace] = None): global args - + if args_cli is None: + args_cli = args_isaaclab scene_json = out_dir / key / "simulation" / "scene.json" assert scene_json.exists(), f"[{key}] scene.json not found: {scene_json}" scene_dict = json.load(open(scene_json, "r")) @@ -401,7 +403,7 @@ def run_conversion_for_key(key: str): args = Args(input=input_list, output=output_list, make_instanceable=False, collision_approximation="convexDecomposition", mass=1.0, disable_gravity=False, kinematic_enabled=False, - headless=args_isaaclab.headless, exit_on_finish=True) + headless=args_cli.headless, exit_on_finish=True) # Run the original main conversion logic for this key log.info(f"[{key}] converting {len(args.input)} assets...") @@ -553,11 +555,11 @@ def run_conversion(scene_dict: dict): log.info(f"Saved USD file to {os.path.abspath(output_path)}") -def usd_conversion(keys): +def usd_conversion(keys, args_cli): for key in keys: log.info(f"\n========== [USD Convert] key: {key} ==========") - run_conversion_for_key(key) - simulation_app.close() + run_conversion_for_key(key, args_cli) + #simulation_app.close() print('[Info] USD conversion completed.') ######################################################## diff --git a/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py b/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py index 530995a..4156ddd 100755 --- a/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py +++ b/openreal2sim/simulation/isaaclab/sim_randomize_rollout.py @@ -13,8 +13,8 @@ import sys from isaaclab.app import AppLauncher from typing import Optional, List -from modules.envs.task_cfg import TaskCfg, TaskType, SuccessMetric, SuccessMetricType, TrajectoryCfg -from modules.envs.task_construct import construct_task_config, add_reference_trajectory, load_task_cfg +from modules.envs. task_cfg import TaskCfg, TaskType, SuccessMetric, SuccessMetricType, TrajectoryCfg +from modules.envs.task_construct import construct_task_config, add_reference_trajectory, load_task_cfg, add_generated_trajectories from modules.envs.randomizer import Randomizer file_path = Path(__file__).resolve() import imageio @@ -23,19 +23,19 @@ sys.path.append(str(file_path.parent.parent)) # ─────────── CLI ─────────── -# parser = argparse.ArgumentParser("sim_policy") -# parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") -# parser.add_argument("--robot", type=str, default="franka") -# parser.add_argument("--num_envs", type=int, default=1) -# parser.add_argument("--num_trials", type=int, default=1) -# parser.add_argument("--teleop_device", type=str, default="keyboard") -# parser.add_argument("--sensitivity", type=float, default=1.0) -# AppLauncher.add_app_launcher_args(parser) -# args_cli = parser.parse_args() -# args_cli.enable_cameras = True -# args_cli.headless = False # headless mode for batch execution -# app_launcher = AppLauncher(vars(args_cli)) -# simulation_app = app_launcher.app +parser = argparse.ArgumentParser("sim_policy") +parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") +parser.add_argument("--robot", type=str, default="franka") +parser.add_argument("--num_envs", type=int, default=1) +parser.add_argument("--num_trials", type=int, default=1) +parser.add_argument("--teleop_device", type=str, default="keyboard") +parser.add_argument("--sensitivity", type=float, default=1.0) +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() +args_cli.enable_cameras = True +args_cli.headless = False # headless mode for batch execution +app_launcher = AppLauncher(vars(args_cli)) +simulation_app = app_launcher.app # ─────────── Runtime imports ─────────── import isaaclab.sim as sim_utils @@ -551,7 +551,7 @@ def run_batch_trajectory(self, traj_cfg_list: List[TrajectoryCfg]): def sim_randomize_rollout(keys: list[str], args_cli: argparse.Namespace): for key in keys: success_trajectory_config_list = [] - total_require_traj_num = 50 + total_require_traj_num = args_cli.total_num task_json_path = BASE_DIR / "tasks" / key / "task.json" task_cfg = load_task_cfg(task_json_path) randomizer = Randomizer(task_cfg) @@ -564,7 +564,7 @@ def sim_randomize_rollout(keys: list[str], args_cli: argparse.Namespace): data_dir = BASE_DIR / "data" / key current_timestep = 0 env, _ = make_env( - cfgs=sim_cfgs, num_envs=3, + cfgs=sim_cfgs, num_envs=args_cli.num_envs, device=args_cli.device, bg_simplify=False, ) @@ -575,8 +575,8 @@ def sim_randomize_rollout(keys: list[str], args_cli: argparse.Namespace): my_sim = RandomizeExecution(sim, scene, sim_cfgs=sim_cfgs, demo_dir=demo_dir, data_dir=data_dir, record=True, args_cli=args_cli, bg_rgb=bg_rgb) my_sim.task_cfg = task_cfg while len(success_trajectory_config_list) < total_require_traj_num: - traj_cfg_list = random_task_cfg_list[current_timestep: current_timestep + 3] - current_timestep += 3 + traj_cfg_list = random_task_cfg_list[current_timestep: current_timestep + args_cli.num_envs] + current_timestep += args_cli.num_envs success_env_ids = my_sim.run_batch_trajectory(traj_cfg_list) @@ -594,7 +594,17 @@ def sim_randomize_rollout(keys: list[str], args_cli: argparse.Namespace): # success_env_ids = my_sim.inference() # del my_sim # torch.cuda.empty_cache() - + add_generated_trajectories(task_cfg, success_trajectory_config_list, task_json_path.parent) return success_trajectory_config_list +def main(): + base_dir = Path.cwd() + cfg = yaml.safe_load((base_dir / "config" / "config.yaml").open("r")) + keys = cfg["keys"] + sim_randomize_rollout(keys, args_cli) + + +if __name__ == "__main__": + main() + simulation_app.close() \ No newline at end of file diff --git a/openreal2sim/simulation/sim_agent.py b/openreal2sim/simulation/sim_agent.py index 486654b..fc5898b 100755 --- a/openreal2sim/simulation/sim_agent.py +++ b/openreal2sim/simulation/sim_agent.py @@ -34,7 +34,8 @@ def __init__(self, stage=None, key=None): # "hand_extraction", #"demo_motion_process", # "grasp_point_generation", - # "grasp_generation" + # "grasp_generation", + # "usd_generation" ] if stage is not None: @@ -133,87 +134,87 @@ def create_args_from_config(config_path: Optional[str] = None, config_dict: Opti return argparse.Namespace(**config_dict) -class IsaacAgent: - def __init__(self, stage=None, key=None): - print('[Info] Initializing IsaacAgent...') - self.base_dir = Path.cwd() - cfg_path = self.base_dir / "config" / "config.yaml" - cfg = yaml.safe_load(cfg_path.open("r")) - self.keys = [key] if key is not None else cfg["keys"] - self.key_cfgs = {key: compose_configs(key, cfg) for key in self.keys} - self.key_scene_dicts = {} - for key in self.keys: - scene_pkl = self.base_dir / f'outputs/{key}/scene/scene.pkl' - with open(scene_pkl, 'rb') as f: - scene_dict = pickle.load(f) - self.key_scene_dicts[key] = scene_dict - self.stages = [ - # "usd_conversion", - #"grasp_generation", - #"sim_heuristic_manip", - "sim_randomize_rollout", - - ] - if stage is not None: - if stage in self.stages: - start_idx = self.stages.index(stage) - self.stages = self.stages[start_idx:] - else: - print(f"[Warning] Stage '{stage}' not found. It must be one of the {self.stages}. Running all stages by default.") - print('[Info] IsaacAgent initialized.') +# class IsaacAgent: +# def __init__(self, stage=None, key=None): +# print('[Info] Initializing IsaacAgent...') +# self.base_dir = Path.cwd() +# cfg_path = self.base_dir / "config" / "config.yaml" +# cfg = yaml.safe_load(cfg_path.open("r")) +# self.keys = [key] if key is not None else cfg["keys"] +# self.key_cfgs = {key: compose_configs(key, cfg) for key in self.keys} +# self.key_scene_dicts = {} +# for key in self.keys: +# scene_pkl = self.base_dir / f'outputs/{key}/scene/scene.pkl' +# with open(scene_pkl, 'rb') as f: +# scene_dict = pickle.load(f) +# self.key_scene_dicts[key] = scene_dict +# self.stages = [ +# # "usd_conversion", +# #"grasp_generation", +# #"sim_heuristic_manip", +# "sim_randomize_rollout", + +# ] +# if stage is not None: +# if stage in self.stages: +# start_idx = self.stages.index(stage) +# self.stages = self.stages[start_idx:] +# else: +# print(f"[Warning] Stage '{stage}' not found. It must be one of the {self.stages}. Running all stages by default.") +# print('[Info] IsaacAgent initialized.') - def launch_isaaclab(self): - - self.args_cli = create_args_from_config() - #AppLauncher.add_app_launcher_args(self.args_cli) - self.args_cli.enable_cameras = True - self.args_cli.headless = True - self.app_launcher = AppLauncher(vars(self.args_cli)) - self.simulation_app = self.app_launcher.app - return self.simulation_app +# def launch_isaaclab(self): + +# self.args_cli = create_args_from_config() +# #AppLauncher.add_app_launcher_args(self.args_cli) +# self.args_cli.enable_cameras = True +# self.args_cli.headless = True +# self.app_launcher = AppLauncher(vars(self.args_cli)) +# self.simulation_app = self.app_launcher.app +# return self.simulation_app - def close_isaaclab(self): - self.simulation_app.close() +# def close_isaaclab(self): +# self.simulation_app.close() - def usd_conversion(self): - from isaaclab.sim_preprocess.usd_conversion import usd_conversion - usd_conversion(self.keys) - print('[Info] USD conversion completed.') +# def usd_conversion(self): +# from isaaclab.sim_preprocess.usd_conversion import usd_conversion +# usd_conversion(self.keys) +# print('[Info] USD conversion completed.') - def sim_heuristic_manip(self): - self.launch_isaaclab() - from isaaclab.sim_heuristic_manip import sim_heuristic_manip - sim_heuristic_manip(self.keys, args_cli=self.args_cli) - print('[Info] Heuristic manipulation simulation completed.') - self.close_isaaclab() +# def sim_heuristic_manip(self): +# self.launch_isaaclab() +# from isaaclab.sim_heuristic_manip import sim_heuristic_manip +# sim_heuristic_manip(self.keys, args_cli=self.args_cli) +# print('[Info] Heuristic manipulation simulation completed.') +# self.close_isaaclab() - def sim_randomize_rollout(self): - self.launch_isaaclab() - from isaaclab.sim_randomize_rollout import sim_randomize_rollout - sim_randomize_rollout(self.keys, args_cli=self.args_cli) - print('[Info] Randomize rollout completed.') - self.close_isaaclab() - - def grasp_generation(self): - from modules.grasp_preprocess.grasp_generation import grasp_generation - grasp_generation(self.keys) - print('[Info] Grasp proposal generation completed.') +# def sim_randomize_rollout(self): +# self.launch_isaaclab() +# from isaaclab.sim_randomize_rollout import sim_randomize_rollout +# sim_randomize_rollout(self.keys, args_cli=self.args_cli) +# print('[Info] Randomize rollout completed.') +# self.close_isaaclab() + +# def grasp_generation(self): +# from modules.grasp_preprocess.grasp_generation import grasp_generation +# grasp_generation(self.keys) +# print('[Info] Grasp proposal generation completed.') - def run(self): - if "usd_conversion" in self.stages: - self.usd_conversion() - if "grasp_generation" in self.stages: - self.grasp_generation() - if "launch_isaaclab" in self.stages: - self.launch_isaaclab() - if "sim_heuristic_manip" in self.stages: - self.sim_heuristic_manip() - - if "sim_randomize_rollout" in self.stages: - self.sim_randomize_rollout() - if "close_isaaclab" in self.stages: - self.close_isaaclab() +# def run(self): +# if "usd_conversion" in self.stages: +# self.usd_conversion() +# if "grasp_generation" in self.stages: +# self.grasp_generation() +# if "launch_isaaclab" in self.stages: +# self.launch_isaaclab() +# if "sim_heuristic_manip" in self.stages: +# self.sim_heuristic_manip() + +# if "sim_randomize_rollout" in self.stages: +# self.sim_randomize_rollout() +# if "close_isaaclab" in self.stages: +# self.close_isaaclab() if __name__ == '__main__': args = argparse.ArgumentParser() @@ -235,14 +236,3 @@ def run(self): if args.label: notify_failed(args.label) raise - - try: - agent = IsaacAgent(stage=args.stage, key=args.key) - scene_dicts = agent.run() - - if args.label: - notify_success(args.label) - except Exception as e: - if args.label: - notify_failed(args.label) - raise \ No newline at end of file diff --git a/openreal2sim/simulation/sim_agent.sh b/openreal2sim/simulation/sim_agent.sh new file mode 100644 index 0000000..ae79bae --- /dev/null +++ b/openreal2sim/simulation/sim_agent.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# Default parameters +KEY="demo_video" +STAGE="" +NUM_ENVS=1 +NUM_TRIALS=1 +DEVICE="cuda:0" +HEADLESS="--headless" +TOTAL_NUM=10 + +usage() { + cat <<'EOF' +Usage: sim_agent.sh [options] + +Options: + --key Scene key to process (default: demo_video) + --stage Starting stage: usd_conversion | sim_heuristic_manip | sim_randomize_rollout + --num_envs Number of parallel environments for heuristic/randomize (default: 1) + --num_trials Number of trials for heuristic stage (default: 1) + --device Device for simulations, e.g. cuda:0 (default: cuda:0) + --no-headless Disable headless mode (passes through to AppLauncher) + -h, --help Show this message and exit + +The script runs the following stages in order (starting from --stage if provided): + 1. USD conversion (`isaaclab/sim_preprocess/usd_conversion.py`) + 2. Heuristic manipulation (`isaaclab/sim_heuristic_manip.py`) + 3. Randomized rollout (`isaaclab/sim_randomize_rollout.py`) +EOF +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --key) + KEY="${2:?Missing value for --key}" + shift 2 + ;; + --stage) + STAGE="${2:?Missing value for --stage}" + shift 2 + ;; + --num_envs) + NUM_ENVS="${2:?Missing value for --num_envs}" + shift 2 + ;; + --num_trials) + NUM_TRIALS="${2:?Missing value for --num_trials}" + shift 2 + ;; + --device) + DEVICE="${2:?Missing value for --device}" + shift 2 + ;; + --no-headless) + HEADLESS="" + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "[ERR] Unknown option: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +# Determine which stages to run +declare -a PIPELINE=("usd_conversion" "sim_heuristic_manip" "sim_randomize_rollout") +declare -a RUN_STAGES=() +if [[ -z "${STAGE}" ]]; then + RUN_STAGES=("${PIPELINE[@]}") +else + found="" + for stage in "${PIPELINE[@]}"; do + if [[ -z "${found}" && "${stage}" == "${STAGE}" ]]; then + found="yes" + fi + if [[ -n "${found}" ]]; then + RUN_STAGES+=("${stage}") + fi + done + if [[ ${#RUN_STAGES[@]} -eq 0 ]]; then + echo "[ERR] Invalid --stage '${STAGE}'. Valid options: ${PIPELINE[*]}" >&2 + exit 1 + fi +fi + +export PYTHONPATH="${ROOT_DIR}:${PYTHONPATH:-}" +cd "${ROOT_DIR}" + +run_stage() { + local stage="$1" + echo "==============================" + echo "[RUN] Stage: ${stage}" + echo "==============================" + case "${stage}" in + usd_conversion) + python pi_i/simulation/isaaclab/sim_preprocess/usd_conversion.py ${HEADLESS} + ;; + sim_heuristic_manip) + python pi_i/simulation/isaaclab/sim_heuristic_manip.py \ + --key "${KEY}" \ + --num_envs "${NUM_ENVS}" \ + --num_trials "${NUM_TRIALS}" \ + --device "${DEVICE}" \ + ${HEADLESS} + ;; + sim_randomize_rollout) + python pi_i/simulation/isaaclab/sim_randomize_rollout.py \ + --key "${KEY}" \ + --num_envs "${NUM_ENVS}" \ + --device "${DEVICE}" \ + -- total_num "${TOTAL_NUM}" \ + ${HEADLESS} + ;; + *) + echo "[ERR] Unsupported stage '${stage}'" >&2 + exit 1 + ;; + esac +} + +for stage in "${RUN_STAGES[@]}"; do + run_stage "${stage}" +done + +echo "[DONE] Pipeline completed: ${RUN_STAGES[*]}" +