Spaces:
Running
on
Zero
Running
on
Zero
# | |
# Copyright (C) 2023, Inria | |
# GRAPHDECO research group, https://team.inria.fr/graphdeco | |
# All rights reserved. | |
# | |
# This software is free for non-commercial, research and evaluation use | |
# under the terms of the LICENSE.md file. | |
# | |
# For inquiries contact george.drettakis@inria.fr | |
# | |
import torch | |
import math | |
import numpy as np | |
from typing import NamedTuple | |
import cv2 | |
import os | |
class BasicPointCloud(NamedTuple): | |
points : np.array | |
colors : np.array | |
normals : np.array | |
def geom_transform_points(points, transf_matrix): | |
P, _ = points.shape | |
ones = torch.ones(P, 1, dtype=points.dtype, device=points.device) | |
points_hom = torch.cat([points, ones], dim=1) | |
points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0)) | |
denom = points_out[..., 3:] + 0.0000001 | |
return (points_out[..., :3] / denom).squeeze(dim=0) | |
def getWorld2View(R, t): | |
Rt = np.zeros((4, 4)) | |
Rt[:3, :3] = R.transpose() | |
Rt[:3, 3] = t | |
Rt[3, 3] = 1.0 | |
return np.float32(Rt) | |
def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0): | |
""" get world 2 camera matrix | |
Args: | |
R (_type_): c2w rotation | |
t (_type_): w2c camera center | |
translate (_type_, optional): _description_. Defaults to np.array([.0, .0, .0]). | |
scale (float, optional): _description_. Defaults to 1.0. | |
Returns: | |
_type_: _description_ | |
""" | |
# compose w2c matrix | |
Rt = np.zeros((4, 4)) | |
Rt[:3, :3] = R.transpose() | |
Rt[:3, 3] = t | |
Rt[3, 3] = 1.0 | |
# invert to get c2w | |
C2W = np.linalg.inv(Rt) | |
cam_center = C2W[:3, 3] | |
cam_center = (cam_center + translate) * scale | |
C2W[:3, 3] = cam_center | |
# get the final w2c matrix | |
Rt = np.linalg.inv(C2W) | |
return np.float32(Rt) | |
def getProjectionMatrix(znear, zfar, fovX, fovY): | |
tanHalfFovY = math.tan((fovY / 2)) | |
tanHalfFovX = math.tan((fovX / 2)) | |
top = tanHalfFovY * znear | |
bottom = -top | |
right = tanHalfFovX * znear | |
left = -right | |
P = torch.zeros(4, 4) | |
z_sign = 1.0 | |
P[0, 0] = 2.0 * znear / (right - left) | |
P[1, 1] = 2.0 * znear / (top - bottom) | |
P[0, 2] = (right + left) / (right - left) | |
P[1, 2] = (top + bottom) / (top - bottom) | |
P[3, 2] = z_sign | |
P[2, 2] = z_sign * zfar / (zfar - znear) | |
P[2, 3] = -(zfar * znear) / (zfar - znear) | |
return P | |
def fov2focal(fov, pixels): | |
return pixels / (2 * math.tan(fov / 2)) | |
def focal2fov(focal, pixels): | |
return 2*math.atan(pixels/(2*focal)) | |
# the following functions depths_double_to_points and depth_double_to_normal are adopted from https://github.com/hugoycj/2dgs-gaustudio/blob/main/utils/graphics_utils.py | |
def depths_double_to_points(view, depthmap1, depthmap2): | |
W, H = view.image_width, view.image_height | |
fx = W / (2 * math.tan(view.FoVx / 2.)) | |
fy = H / (2 * math.tan(view.FoVy / 2.)) | |
intrins_inv = torch.tensor( | |
[[1/fx, 0.,-W/(2 * fx)], | |
[0., 1/fy, -H/(2 * fy),], | |
[0., 0., 1.0]] | |
).float().cuda() | |
grid_x, grid_y = torch.meshgrid(torch.arange(W)+0.5, torch.arange(H)+0.5, indexing='xy') | |
points = torch.stack([grid_x, grid_y, torch.ones_like(grid_x)], dim=0).reshape(3, -1).float().cuda() | |
rays_d = intrins_inv @ points | |
points1 = depthmap1.reshape(1,-1) * rays_d | |
points2 = depthmap2.reshape(1,-1) * rays_d | |
return points1.reshape(3,H,W), points2.reshape(3,H,W) | |
def point_double_to_normal(view, points1, points2): | |
points = torch.stack([points1, points2],dim=0) | |
output = torch.zeros_like(points) | |
dx = points[...,2:, 1:-1] - points[...,:-2, 1:-1] | |
dy = points[...,1:-1, 2:] - points[...,1:-1, :-2] | |
normal_map = torch.nn.functional.normalize(torch.cross(dx, dy, dim=1), dim=1) | |
output[...,1:-1, 1:-1] = normal_map | |
return output | |
def depth_double_to_normal(view, depth1, depth2): | |
points1, points2 = depths_double_to_points(view, depth1, depth2) | |
return point_double_to_normal(view, points1, points2) | |
def bilinear_sampler(img, coords, mask=False): | |
""" Wrapper for grid_sample, uses pixel coordinates """ | |
H, W = img.shape[-2:] | |
xgrid, ygrid = coords.split([1,1], dim=-1) | |
xgrid = 2*xgrid/(W-1) - 1 | |
ygrid = 2*ygrid/(H-1) - 1 | |
grid = torch.cat([xgrid, ygrid], dim=-1) | |
img = torch.nn.functional.grid_sample(img, grid, align_corners=True) | |
if mask: | |
mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1) | |
return img, mask.float() | |
return img | |
# project the reference point cloud into the source view, then project back | |
#extrinsics here refers c2w | |
def reproject_with_depth(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src): | |
width, height = depth_ref.shape[1], depth_ref.shape[0] | |
## step1. project reference pixels to the source view | |
# reference view x, y | |
x_ref, y_ref = np.meshgrid(np.arange(0, width), np.arange(0, height)) | |
x_ref, y_ref = x_ref.reshape([-1]), y_ref.reshape([-1]) | |
# reference 3D space | |
xyz_ref = np.matmul(np.linalg.inv(intrinsics_ref), | |
np.vstack((x_ref, y_ref, np.ones_like(x_ref))) * depth_ref.reshape([-1])) | |
# source 3D space | |
xyz_src = np.matmul(np.matmul(extrinsics_src, np.linalg.inv(extrinsics_ref)), | |
np.vstack((xyz_ref, np.ones_like(x_ref))))[:3] | |
# source view x, y | |
K_xyz_src = np.matmul(intrinsics_src, xyz_src) | |
xy_src = K_xyz_src[:2] / K_xyz_src[2:3] | |
## step2. reproject the source view points with source view depth estimation | |
# find the depth estimation of the source view | |
x_src = xy_src[0].reshape([height, width]).astype(np.float32) | |
y_src = xy_src[1].reshape([height, width]).astype(np.float32) | |
sampled_depth_src = cv2.remap(depth_src, x_src, y_src, interpolation=cv2.INTER_LINEAR) | |
# mask = sampled_depth_src > 0 | |
# source 3D space | |
# NOTE that we should use sampled source-view depth_here to project back | |
xyz_src = np.matmul(np.linalg.inv(intrinsics_src), | |
np.vstack((xy_src, np.ones_like(x_ref))) * sampled_depth_src.reshape([-1])) | |
# reference 3D space | |
xyz_reprojected = np.matmul(np.matmul(extrinsics_ref, np.linalg.inv(extrinsics_src)), | |
np.vstack((xyz_src, np.ones_like(x_ref))))[:3] | |
# source view x, y, depth | |
depth_reprojected = xyz_reprojected[2].reshape([height, width]).astype(np.float32) | |
K_xyz_reprojected = np.matmul(intrinsics_ref, xyz_reprojected) | |
xy_reprojected = K_xyz_reprojected[:2] / K_xyz_reprojected[2:3] | |
x_reprojected = xy_reprojected[0].reshape([height, width]).astype(np.float32) | |
y_reprojected = xy_reprojected[1].reshape([height, width]).astype(np.float32) | |
return depth_reprojected, x_reprojected, y_reprojected, x_src, y_src | |
def check_geometric_consistency(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src, thre1=0.5, thre2=0.01): | |
width, height = depth_ref.shape[1], depth_ref.shape[0] | |
x_ref, y_ref = np.meshgrid(np.arange(0, width), np.arange(0, height)) | |
depth_reprojected, x2d_reprojected, y2d_reprojected, x2d_src, y2d_src = reproject_with_depth(depth_ref, intrinsics_ref, extrinsics_ref, | |
depth_src, intrinsics_src, extrinsics_src) | |
# check |p_reproj-p_1| < 1 | |
dist = np.sqrt((x2d_reprojected - x_ref) ** 2 + (y2d_reprojected - y_ref) ** 2) | |
# check |d_reproj-d_1| / d_1 < 0.01 | |
depth_diff = np.abs(depth_reprojected - depth_ref) | |
relative_depth_diff = depth_diff / depth_ref | |
mask = np.logical_and(dist < thre1, relative_depth_diff < thre2) | |
# mask = dist < 0.2 | |
depth_reprojected[~mask] = 0 | |
return mask, depth_reprojected, x2d_src, y2d_src, relative_depth_diff |