Skip to content

Commit 0bc6fe3

Browse files
committed
add colmap(gaustudio) dataloader
1 parent c7d0c43 commit 0bc6fe3

File tree

5 files changed

+323
-0
lines changed

5 files changed

+323
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
__pycache__
22
*.ply
33

4+
result*
45
.idea*

colmap_with_depth.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import argparse
2+
import os
3+
import time
4+
import numpy as np
5+
import cv2
6+
7+
import fusion
8+
from data_loader import readGaustudioColmap, read_depth_meter
9+
10+
11+
def tsdf_fusion(input_path, output_path):
12+
# ======================================================================================================== #
13+
# load colmap data from gaustudio
14+
# ======================================================================================================== #
15+
cam_infos, cam_intr = readGaustudioColmap(input_path)
16+
# ======================================================================================================== #
17+
18+
# ======================================================================================================== #
19+
# (Optional) This is an example of how to compute the 3D bounds
20+
# in world coordinates of the convex hull of all camera view
21+
# frustums in the dataset
22+
# ======================================================================================================== #
23+
print("Estimating voxel volume bounds...")
24+
vol_bnds = np.zeros((3, 2))
25+
for cam_info in cam_infos:
26+
depth_im = read_depth_meter(cam_info.depth_path)
27+
28+
T = cam_info.T.reshape(3, 1)
29+
cam_pose = np.eye(4)
30+
cam_pose[:3, :3] = cam_info.R
31+
cam_pose[:3, 3] = T.squeeze()
32+
33+
# Compute camera view frustum and extend convex hull
34+
view_frust_pts = fusion.get_view_frustum(depth_im, cam_intr, cam_pose)
35+
vol_bnds[:, 0] = np.minimum(vol_bnds[:, 0], np.amin(view_frust_pts, axis=1))
36+
vol_bnds[:, 1] = np.maximum(vol_bnds[:, 1], np.amax(view_frust_pts, axis=1))
37+
# ======================================================================================================== #
38+
39+
# ======================================================================================================== #
40+
# Integrate
41+
# ======================================================================================================== #
42+
# Initialize voxel volume
43+
print("Initializing voxel volume...")
44+
tsdf_vol = fusion.TSDFVolume(vol_bnds, voxel_size=0.02)
45+
print("Initialize Success!")
46+
47+
# Loop through RGB-D images and fuse them together
48+
t0_elapse = time.time()
49+
i = 0
50+
n_imgs = len(cam_infos)
51+
for cam_info in cam_infos:
52+
print("Fusing frame %d/%d" % (i + 1, n_imgs))
53+
i += 1
54+
55+
# Read RGB-D image and camera pose
56+
color_image = cv2.cvtColor(cv2.imread(cam_info.image_path), cv2.COLOR_BGR2RGB)
57+
depth_im = read_depth_meter(cam_info.depth_path)
58+
59+
T = cam_info.T.reshape(3, 1)
60+
cam_pose = np.eye(4)
61+
cam_pose[:3, :3] = cam_info.R
62+
cam_pose[:3, 3] = T.squeeze()
63+
64+
# Integrate observation into voxel volume (assume color aligned with depth)
65+
tsdf_vol.integrate(color_image, depth_im, cam_intr, cam_pose, obs_weight=1.)
66+
# ======================================================================================================== #
67+
68+
# ======================================================================================================== #
69+
# Save result
70+
# ======================================================================================================== #
71+
if not os.path.exists(output_path):
72+
os.makedirs(output_path)
73+
74+
fps = n_imgs / (time.time() - t0_elapse)
75+
print("Average FPS: {:.2f}".format(fps))
76+
77+
# Get mesh from voxel volume and save to disk (can be viewed with Meshlab)
78+
print("Saving mesh to mesh.ply and trimesh.ply...")
79+
verts, faces, norms, colors = tsdf_vol.get_mesh()
80+
fusion.meshwrite(os.path.join(output_path, "mesh.ply"), verts, faces, norms, colors)
81+
fusion.meshwrite_trimesh(os.path.join(output_path, "trimesh.ply"), verts, faces)
82+
83+
# Get point cloud from voxel volume and save to disk (can be viewed with Meshlab)
84+
print("Saving point cloud to pc.ply...")
85+
point_cloud = tsdf_vol.get_point_cloud()
86+
fusion.pcwrite(os.path.join(output_path, "pc.ply"), point_cloud)
87+
# ======================================================================================================== #
88+
89+
90+
if __name__ == "__main__":
91+
parser = argparse.ArgumentParser()
92+
parser.add_argument('--input_path', required=True, help='Path to input data')
93+
parser.add_argument('--output_path', required=True, help='Path to output result')
94+
args = parser.parse_args()
95+
96+
tsdf_fusion(args.input_path, args.output_path)

data_loader.py

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
import os
2+
import sys
3+
import collections
4+
import struct
5+
import numpy as np
6+
import cv2
7+
8+
from PIL import Image as PILImage
9+
10+
CameraModel = collections.namedtuple(
11+
"CameraModel", ["model_id", "model_name", "num_params"])
12+
Camera = collections.namedtuple(
13+
"Camera", ["id", "model", "width", "height", "params"])
14+
Image = collections.namedtuple(
15+
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
16+
CameraInfo = collections.namedtuple(
17+
"CameraInfo", ["uid", "R", "T", "image_path", "depth_path", "image_name"]
18+
)
19+
CAMERA_MODELS = {
20+
CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
21+
CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
22+
CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
23+
CameraModel(model_id=3, model_name="RADIAL", num_params=5),
24+
CameraModel(model_id=4, model_name="OPENCV", num_params=8),
25+
CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
26+
CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
27+
CameraModel(model_id=7, model_name="FOV", num_params=5),
28+
CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
29+
CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
30+
CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
31+
}
32+
CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
33+
for camera_model in CAMERA_MODELS])
34+
35+
36+
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
37+
"""Read and unpack the next bytes from a binary file.
38+
:param fid:
39+
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
40+
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
41+
:param endian_character: Any of {@, =, <, >, !}
42+
:return: Tuple of read and unpacked values.
43+
"""
44+
data = fid.read(num_bytes)
45+
return struct.unpack(endian_character + format_char_sequence, data)
46+
47+
48+
def read_extrinsics_binary(path_to_model_file):
49+
"""
50+
see: src/base/reconstruction.cc
51+
void Reconstruction::ReadImagesBinary(const std::string& path)
52+
void Reconstruction::WriteImagesBinary(const std::string& path)
53+
"""
54+
images = {}
55+
with open(path_to_model_file, "rb") as fid:
56+
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
57+
for _ in range(num_reg_images):
58+
binary_image_properties = read_next_bytes(
59+
fid, num_bytes=64, format_char_sequence="idddddddi")
60+
image_id = binary_image_properties[0]
61+
qvec = np.array(binary_image_properties[1:5])
62+
tvec = np.array(binary_image_properties[5:8])
63+
camera_id = binary_image_properties[8]
64+
image_name = ""
65+
current_char = read_next_bytes(fid, 1, "c")[0]
66+
while current_char != b"\x00": # look for the ASCII 0 entry
67+
image_name += current_char.decode("utf-8")
68+
current_char = read_next_bytes(fid, 1, "c")[0]
69+
num_points2D = read_next_bytes(fid, num_bytes=8,
70+
format_char_sequence="Q")[0]
71+
x_y_id_s = read_next_bytes(fid, num_bytes=24 * num_points2D,
72+
format_char_sequence="ddq" * num_points2D)
73+
xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
74+
tuple(map(float, x_y_id_s[1::3]))])
75+
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
76+
images[image_id] = Image(
77+
id=image_id, qvec=qvec, tvec=tvec,
78+
camera_id=camera_id, name=image_name,
79+
xys=xys, point3D_ids=point3D_ids)
80+
return images
81+
82+
83+
def read_intrinsics_binary(path_to_model_file):
84+
"""
85+
see: src/base/reconstruction.cc
86+
void Reconstruction::WriteCamerasBinary(const std::string& path)
87+
void Reconstruction::ReadCamerasBinary(const std::string& path)
88+
"""
89+
cameras = {}
90+
with open(path_to_model_file, "rb") as fid:
91+
num_cameras = read_next_bytes(fid, 8, "Q")[0]
92+
for _ in range(num_cameras):
93+
camera_properties = read_next_bytes(
94+
fid, num_bytes=24, format_char_sequence="iiQQ")
95+
camera_id = camera_properties[0]
96+
model_id = camera_properties[1]
97+
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
98+
width = camera_properties[2]
99+
height = camera_properties[3]
100+
num_params = CAMERA_MODEL_IDS[model_id].num_params
101+
params = read_next_bytes(fid, num_bytes=8 * num_params,
102+
format_char_sequence="d" * num_params)
103+
cameras[camera_id] = Camera(id=camera_id,
104+
model=model_name,
105+
width=width,
106+
height=height,
107+
params=np.array(params))
108+
assert len(cameras) == num_cameras
109+
return cameras
110+
111+
112+
def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
113+
def qvec2rotmat(qvec):
114+
return np.array([
115+
[1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
116+
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
117+
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
118+
[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
119+
1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
120+
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
121+
[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
122+
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
123+
1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2]])
124+
125+
cam_infos = []
126+
for idx, key in enumerate(cam_extrinsics):
127+
sys.stdout.write('\r')
128+
sys.stdout.write("Reading camera {}/{}".format(idx + 1, len(cam_extrinsics)))
129+
sys.stdout.flush()
130+
131+
extr = cam_extrinsics[key]
132+
intr = cam_intrinsics[extr.camera_id]
133+
134+
uid = intr.id
135+
136+
# 需要转换到TSDF Fusion需要的格式
137+
R = np.transpose(qvec2rotmat(extr.qvec))
138+
T = np.array(extr.tvec)
139+
T = -R.dot(T)
140+
141+
image_path = os.path.join(images_folder, os.path.basename(extr.name))
142+
depth_path = image_path.replace("/images/", "/depths/").replace(".jpg", ".png")
143+
image_name = os.path.splitext(os.path.basename(image_path))[0]
144+
145+
cam_info = CameraInfo(uid=uid, R=R, T=T, image_path=image_path, depth_path=depth_path, image_name=image_name)
146+
cam_infos.append(cam_info)
147+
sys.stdout.write('\n')
148+
return cam_infos
149+
150+
151+
def readGaustudioColmap(path):
152+
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin")
153+
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin")
154+
cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file)
155+
cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file)
156+
157+
images_dir = os.path.join(path, "images")
158+
cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics,
159+
cam_intrinsics=cam_intrinsics,
160+
images_folder=images_dir)
161+
162+
cam_infos = sorted(cam_infos_unsorted.copy(), key=lambda x: x.image_name)
163+
164+
intrinsic_path = os.path.join(path, "intrinsic.txt")
165+
if os.path.exists(intrinsic_path):
166+
with open(intrinsic_path, 'r') as f:
167+
intrinsic_matrix = np.array([list(map(float, line.split())) for line in f.readlines()])
168+
else:
169+
fx_sum = 0
170+
fy_sum = 0
171+
cx_sum = 0
172+
cy_sum = 0
173+
count = 0
174+
175+
for id, camera in cam_intrinsics.items():
176+
fx, fy, cx, cy = camera.params
177+
fx_sum += fx
178+
fy_sum += fy
179+
cx_sum += cx
180+
cy_sum += cy
181+
count += 1
182+
183+
avg_fx = fx_sum / count
184+
avg_fy = fy_sum / count
185+
avg_cx = cx_sum / count
186+
avg_cy = cy_sum / count
187+
188+
intrinsic_matrix = np.array([
189+
[avg_fx, 0, avg_cx],
190+
[0, avg_fy, avg_cy],
191+
[0, 0, 1]
192+
])
193+
194+
with open(intrinsic_path, 'w') as f:
195+
for row in intrinsic_matrix:
196+
f.write(" ".join(map(str, row)) + "\n")
197+
198+
return cam_infos, intrinsic_matrix
199+
200+
201+
def read_depth_meter(depth_path):
202+
# Read depth_max from image
203+
depth_img = PILImage.open(depth_path)
204+
png_info = depth_img.info
205+
if "depth_max" in png_info:
206+
depth_max = float(png_info["depth_max"])
207+
else:
208+
raise ValueError("depth_max not found in the PNG metadata!")
209+
210+
# Read depth image
211+
depth_im = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(float)
212+
depth_im = (depth_im / 65535.0) * depth_max
213+
depth_im[depth_im >= 8.0] = 0 # set invalid depth to 0 (specific to iphone dataset)
214+
215+
return depth_im

fusion.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (c) 2018 Andy Zeng
22

33
import numpy as np
4+
import trimesh
45

56
from numba import njit, prange
67
from skimage import measure
@@ -429,3 +430,12 @@ def pcwrite(filename, xyzrgb):
429430
xyz[i, 0], xyz[i, 1], xyz[i, 2],
430431
rgb[i, 0], rgb[i, 1], rgb[i, 2],
431432
))
433+
434+
435+
def meshwrite_trimesh(filename, verts, faces):
436+
"""Save a 3D mesh to a polygon .ply file using trimesh.
437+
"""
438+
# 创建 trimesh 对象
439+
mesh = trimesh.Trimesh(vertices=verts, faces=faces)
440+
# 保存为 .ply 文件(trimesh 会自动处理格式)
441+
mesh.export(filename, file_type='ply')

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ numpy==1.21.6
22
opencv-python
33
scikit-image==0.18.3
44
numba==0.56.4
5+
trimesh
56

67
# Optional
78
pycuda

0 commit comments

Comments
 (0)