Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
0112c69
update vln yaml; fix import agent
kew6688 Nov 3, 2025
05ea2a3
update habitat, using evaluator and config; env and agent is WIP
kew6688 Nov 10, 2025
9902401
add distributed_base evaluator
kew6688 Nov 11, 2025
0d00014
Habitat env applied, distributed evaluator applied; clean evaluator a…
kew6688 Nov 12, 2025
7e25e72
fix observation issues
kew6688 Nov 12, 2025
2b0eb8b
update new register name; tiny fix on style
kew6688 Nov 12, 2025
b414ba3
latest tested
kew6688 Nov 12, 2025
99adf73
delete temp agent; rename default evaluator for habitat
kew6688 Nov 12, 2025
75b38a7
update slurm bash
kew6688 Nov 12, 2025
dcf7ee5
merge to main
kew6688 Nov 12, 2025
08bb9c3
update readme
kew6688 Nov 12, 2025
cde84b3
fix init dist print
kew6688 Nov 13, 2025
c89723d
fix eval config; fix local rank to rank
kew6688 Nov 13, 2025
7836276
update init distributed mode if condition
kew6688 Nov 13, 2025
dac13e1
update dist for dlc
kew6688 Nov 13, 2025
d8734c7
fix bug in evaluator
kew6688 Nov 13, 2025
bfe3616
update distributed vln multi, episode loader
kew6688 Nov 15, 2025
43957f6
cma tested
kew6688 Nov 17, 2025
a9ca15d
cma tested; episode loader; torchrun; local agent tested; TODO fix is…
kew6688 Nov 17, 2025
6e93ba0
add vlnpe distributed script
kew6688 Nov 17, 2025
b4af0ce
add grscene; add new result write and resumble load data feature base…
kew6688 Nov 18, 2025
ae33f09
fix bugs in evaluator and dataset for distributed; n1 and rdp tested
kew6688 Nov 19, 2025
53258b1
fix comm log concurrency issue, create file with exist true
kew6688 Nov 21, 2025
50b1f24
fix progress log mkdir race condition
kew6688 Nov 21, 2025
8665b38
fix comments
kew6688 Nov 25, 2025
7d5daa6
polish existing configs and bash
kew6688 Nov 25, 2025
4f67d9b
update bash align with doc
kew6688 Nov 25, 2025
d7e12e9
fix CI test
kew6688 Nov 25, 2025
4eff8f9
rename habitat_extensions
kew6688 Nov 26, 2025
b4a1998
fix comments
kew6688 Nov 27, 2025
c6cf34d
remove useless line
kew6688 Nov 27, 2025
f74a79b
[file] Update 3D Printing Files for Camera of Unitree Go2 (#176)
yuqiang-yang Nov 27, 2025
b34f29a
fix visualize image size; fix unused comment code; fixed evaluator name
kew6688 Dec 1, 2025
ea73075
bump to version v0.0.2
kew6688 Dec 1, 2025
7a23b72
Merge branch 'main' into vlnpe_refactor
kew6688 Dec 1, 2025
8082292
update vlnmulti to VLN; update habitatVln to HabitatVLN
kew6688 Dec 1, 2025
da06d22
:Revert "Merge branch 'main' into vlnpe_refactor"
kew6688 Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 130 additions & 91 deletions internnav/agent/internvla_n1_agent.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions internnav/configs/agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class AgentCfg(BaseModel):
server_host: str = 'localhost'
server_port: int = 5000
server_port: int = 8087
model_name: str
ckpt_path: str
ckpt_path: str = None
model_settings: Dict[str, Any]


Expand Down
6 changes: 3 additions & 3 deletions internnav/configs/evaluator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ class EvalCfg(BaseModel):
eval_type: Optional[str] = None
eval_settings: Optional[Dict[str, Any]] = {}
agent: Optional[AgentCfg] = None
env: EnvCfg
task: TaskCfg
dataset: EvalDatasetCfg
env: EnvCfg = None
task: TaskCfg = None
dataset: EvalDatasetCfg = None


__all__ = [
Expand Down
23 changes: 23 additions & 0 deletions internnav/env/internutopia_env.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import os
import sys
from typing import Any, Dict, List

from internnav.configs.evaluator import EnvCfg, TaskCfg
from internnav.env import base
from internnav.env.utils.episode_loader import (
ResumablePathKeyEpisodeloader,
generate_vln_episode,
)


@base.Env.register('internutopia')
Expand All @@ -22,6 +28,23 @@ def __init__(self, env_config: EnvCfg, task_config: TaskCfg):
super().__init__(env_config, task_config)
env_settings = self.env_config.env_settings
task_settings = self.task_config.task_settings

# generate episodes
self.episode_loader = ResumablePathKeyEpisodeloader(
env_settings['dataset'].dataset_type,
**env_settings['dataset'].dataset_settings,
rank=env_settings['rank'],
world_size=env_settings['world_size']
)
self.episodes = generate_vln_episode(self.episode_loader, task_config)
if len(self.episodes) == 0:
print("No episodes found for the given configuration.")
sys.exit(0)
task_settings.update({'episodes': self.episodes})

# set visible device for isaac sim
os.environ["CUDA_VISIBLE_DEVICES"] = str(env_settings.get('local_rank', 0))

config = Config(
simulator=SimConfig(**env_settings),
env_num=task_settings['env_num'],
Expand Down
2 changes: 2 additions & 0 deletions internnav/env/utils/episode_loader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .generate_episode import generate_vln_episode
from .resumable import ResumablePathKeyEpisodeloader
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from internnav.evaluator.utils.common import load_data
from .dataset_utils import load_data, revise_one_data, skip_list

from .data_reviser import revise_one_data, skip_list


class BasePathKeyDataloader:
class BasePathKeyEpisodeloader:
def __init__(
self,
dataset_type,
Expand All @@ -13,7 +11,15 @@ def __init__(
filter_same_trajectory,
revise_data=True,
filter_stairs=True,
rank=0,
world_size=1,
):
# current supported dataset types in InternUtopia
# only kujiale has special scene path
# others type should be considered the same as mp3d in loading
allowed = ('R2RVLN', 'mp3d', 'kujiale', 'grscene')
assert dataset_type in allowed, f"Unsupported dataset type: {dataset_type}. Allowed: {allowed}"

self.path_key_data = {}
self.path_key_scan = {}
self.path_key_split = {}
Expand All @@ -25,14 +31,19 @@ def __init__(
filter_same_trajectory=filter_same_trajectory,
filter_stairs=filter_stairs,
dataset_type=dataset_type,
rank=rank,
world_size=world_size,
)
for scan, path_list in load_data_map.items():
for path in path_list:
trajectory_id = path['trajectory_id']
if revise_data:

# tiny revision for R2R dataset in MP3D to fit vlnpe task
if dataset_type == 'mp3d' and revise_data:
if trajectory_id in skip_list:
continue
path = revise_one_data(path)

episode_id = path['episode_id']
path_key = f'{trajectory_id}_{episode_id}'
path['start_position'] += robot_offset
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
import copy
import gzip
import json
import os
from collections import defaultdict

import numpy as np

from internnav.utils.common_log_util import common_logger as log

fall_path_z_0_3 = [
70,
121,
Expand Down Expand Up @@ -322,8 +332,7 @@
2306,
]

skip_list = [
]
skip_list = []

fall_path_custom = {
6558: [-1, 0, 0],
Expand Down Expand Up @@ -447,3 +456,112 @@ def revise_one_data(origin):
origin['reference_path'][0][1] = origin['reference_path'][0][1] + amend_offset[1]
origin['reference_path'][0][2] = origin['reference_path'][0][2] + amend_offset[2]
return origin


def transform_rotation_z_90degrees(rotation):
z_rot_90 = [np.cos(np.pi / 4), 0, 0, np.sin(np.pi / 4)] # 90 degrees = pi/2 radians
w1, x1, y1, z1 = rotation
w2, x2, y2, z2 = z_rot_90
revised_rotation = [
w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2, # w
w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2, # x
w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2, # y
w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2, # z
]
return revised_rotation


def has_stairs(item, height_threshold=0.3):
has_stairs = False
if 'stair' in item['instruction']['instruction_text']:
latest_height = item['reference_path'][0][-1]
for index in range(1, len(item['reference_path'])):
position = item['reference_path'][index]
if abs(position[-1] - latest_height) >= height_threshold:
has_stairs = True
break
else:
latest_height = position[-1]
return has_stairs


def different_height(item):
different_height = False
paths = item['reference_path']
for path_idx in range(len(paths) - 1):
if abs(paths[path_idx + 1][2] - paths[path_idx][2]) > 0.3:
different_height = True
break
return different_height


def load_data(
dataset_root_dir, split, filter_same_trajectory=True, filter_stairs=True, dataset_type='mp3d', rank=0, world_size=1
):
with gzip.open(os.path.join(dataset_root_dir, split, f"{split}.json.gz"), 'rt', encoding='utf-8') as f:
data = json.load(f)['episodes'][rank::world_size]

if dataset_type in ['kujiale', 'grscene']:
scenes = list(set([x['scan'] for x in data]))
else:
scenes = list(set([x['scene_id'] for x in data])) # e.g. 'mp3d/zsNo4HB9uLZ/zsNo4HB9uLZ.glb'

scenes.sort()
new_data = {}
for scene in scenes:
if dataset_type in ['kujiale', 'grscene']:
scene_data = [x for x in data if x['scan'] == scene]
scan = scene
else:
scene_data = [x for x in data if x['scene_id'] == scene]
scan = scene.split('/')[1] # e.g. 'zsNo4HB9uLZ'
new_scene_data = []
for item in scene_data:
new_item = copy.deepcopy(item)
new_item['scan'] = scan
new_item['original_start_position'] = item['start_position']
new_item['original_start_rotation'] = item['start_rotation']
if dataset_type == 'mp3d':
x, z, y = item['start_position']
new_item['start_position'] = [x, -y, z]
r1, r2, r3, r4 = item['start_rotation']
new_item['start_rotation'] = transform_rotation_z_90degrees([-r4, r1, r3, -r2])
new_item['reference_path'] = [[x, -y, z] for x, z, y in item['reference_path']]
new_scene_data.append(new_item)

new_data[scan] = new_scene_data

data = copy.deepcopy(new_data)
new_data = defaultdict(list)

# filter_same_trajectory
if filter_same_trajectory:
total_count = 0
remaining_count = 0
trajectory_list = []
for scan, data_item in data.items():
for item in data_item:
total_count += 1
if item['trajectory_id'] in trajectory_list:
continue
remaining_count += 1
trajectory_list.append(item['trajectory_id'])
new_data[scan].append(item)
log.info(f'[split:{split}]filter_same_trajectory remain: [ {remaining_count} / {total_count} ]')
data = new_data
new_data = defaultdict(list)

if filter_stairs:
total_count = 0
remaining_count = 0
for scan, data_item in data.items():
for item in data_item:
total_count += 1
if has_stairs(item) or different_height(item):
continue
remaining_count += 1
new_data[scan].append(item)
log.info(f'[split:{split}]filter_stairs remain: [ {remaining_count} / {total_count} ]')
data = new_data

return data
Original file line number Diff line number Diff line change
@@ -1,11 +1,43 @@
from internnav.configs.evaluator import EvalCfg
from internnav.evaluator.utils.common import load_kujiale_scene_usd, load_scene_usd
from internnav.projects.dataloader.resumable import ResumablePathKeyDataloader
import os

from internnav.configs.evaluator import TaskCfg
from internnav.utils.common_log_util import common_logger as log

def generate_episode(dataloader: ResumablePathKeyDataloader, config: EvalCfg):
scene_data_dir = config.task.scene.scene_data_dir
scene_asset_path = config.task.scene.scene_asset_path
from .resumable import ResumablePathKeyEpisodeloader


def load_scene_usd(mp3d_data_dir, scan):
"""Load scene USD based on the scan"""
from internutopia.core.util import is_in_container

find_flag = False
for root, dirs, files in os.walk(os.path.join(mp3d_data_dir, scan)):
target_file_name = 'fixed_docker.usd' if is_in_container() else 'fixed.usd'
for file in files:
if file == target_file_name:
scene_usd_path = os.path.join(root, file)
find_flag = True
break
if find_flag:
break
if not find_flag:
log.error('Scene USD not found for scan %s', scan)
return None
return scene_usd_path


def load_kujiale_scene_usd(kujiale_iros_data_dir, scan):
"""Load scene USD based on the scan"""
scene_usd_path = os.path.join(kujiale_iros_data_dir, scan, f'{scan}.usda')
if not os.path.exists(scene_usd_path):
log.error('Scene USD not found for scan %s', scan)
return None
return scene_usd_path


def generate_vln_episode(dataloader: ResumablePathKeyEpisodeloader, task: TaskCfg):
scene_data_dir = task.scene.scene_data_dir
scene_asset_path = task.scene.scene_asset_path
eval_path_key_list = dataloader.resumed_path_key_list
path_key_data = dataloader.path_key_data
episodes = []
Expand All @@ -21,9 +53,9 @@ def generate_episode(dataloader: ResumablePathKeyDataloader, config: EvalCfg):
from internnav.env.utils.internutopia_extension.configs.tasks import VLNEvalTaskCfg

robot = H1RobotCfg(
**config.task.robot.robot_settings,
controllers=[ControllerCfg(**cfg.controller_settings) for cfg in config.task.robot.controllers],
sensors=[RepCameraCfg(**cfg.sensor_settings) for cfg in config.task.robot.sensors],
**task.robot.robot_settings,
controllers=[ControllerCfg(**cfg.controller_settings) for cfg in task.robot.controllers],
sensors=[RepCameraCfg(**cfg.sensor_settings) for cfg in task.robot.sensors],
)

for path_key in eval_path_key_list:
Expand All @@ -33,23 +65,23 @@ def generate_episode(dataloader: ResumablePathKeyDataloader, config: EvalCfg):
data['path_key'] = path_key
data['name'] = dataloader.task_name

if config.task.scene.scene_type == 'kujiale':
if task.scene.scene_type == 'kujiale':
load_scene_func = load_kujiale_scene_usd
scene_scale = (1, 1, 1)
else:
load_scene_func = load_scene_usd
scene_scale = (1, 1, 1)

robot_flash = getattr(config.task, "robot_flash", False)
one_step_stand_still = getattr(config.task, "one_step_stand_still", False)
if config.task.metric.metric_setting['metric_config'].get('name', None) is None:
config.task.metric.metric_setting['metric_config']['name'] = 'default_eval_name'
robot_flash = getattr(task, "robot_flash", False)
one_step_stand_still = getattr(task, "one_step_stand_still", False)
if task.metric.metric_setting['metric_config'].get('name', None) is None:
task.metric.metric_setting['metric_config']['name'] = 'default_eval_name'
episodes.append(
VLNEvalTaskCfg(
**config.task.task_settings,
**task.task_settings,
robot_flash=robot_flash,
one_step_stand_still=one_step_stand_still,
metrics=[VLNPEMetricCfg(**config.task.metric.metric_setting['metric_config'])],
metrics=[VLNPEMetricCfg(**task.metric.metric_setting['metric_config'])],
scene_asset_path=load_scene_func(scene_data_dir, dataloader.path_key_scan[path_key])
if scene_asset_path == ''
else scene_asset_path,
Expand Down
Loading