# # Copyright (C) 2023, Inria # GRAPHDECO research group, https://team.inria.fr/graphdeco # All rights reserved. # # This software is free for non-commercial, research and evaluation use # under the terms of the LICENSE.md file. # # For inquiries contact george.drettakis@inria.fr # import torch import math import numpy as np from typing import NamedTuple import cv2 import os class BasicPointCloud(NamedTuple): points : np.array colors : np.array normals : np.array def geom_transform_points(points, transf_matrix): P, _ = points.shape ones = torch.ones(P, 1, dtype=points.dtype, device=points.device) points_hom = torch.cat([points, ones], dim=1) points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0)) denom = points_out[..., 3:] + 0.0000001 return (points_out[..., :3] / denom).squeeze(dim=0) def getWorld2View(R, t): Rt = np.zeros((4, 4)) Rt[:3, :3] = R.transpose() Rt[:3, 3] = t Rt[3, 3] = 1.0 return np.float32(Rt) def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0): """ get world 2 camera matrix Args: R (_type_): c2w rotation t (_type_): w2c camera center translate (_type_, optional): _description_. Defaults to np.array([.0, .0, .0]). scale (float, optional): _description_. Defaults to 1.0. Returns: _type_: _description_ """ # compose w2c matrix Rt = np.zeros((4, 4)) Rt[:3, :3] = R.transpose() Rt[:3, 3] = t Rt[3, 3] = 1.0 # invert to get c2w C2W = np.linalg.inv(Rt) cam_center = C2W[:3, 3] cam_center = (cam_center + translate) * scale C2W[:3, 3] = cam_center # get the final w2c matrix Rt = np.linalg.inv(C2W) return np.float32(Rt) def getProjectionMatrix(znear, zfar, fovX, fovY): tanHalfFovY = math.tan((fovY / 2)) tanHalfFovX = math.tan((fovX / 2)) top = tanHalfFovY * znear bottom = -top right = tanHalfFovX * znear left = -right P = torch.zeros(4, 4) z_sign = 1.0 P[0, 0] = 2.0 * znear / (right - left) P[1, 1] = 2.0 * znear / (top - bottom) P[0, 2] = (right + left) / (right - left) P[1, 2] = (top + bottom) / (top - bottom) P[3, 2] = z_sign P[2, 2] = z_sign * zfar / (zfar - znear) P[2, 3] = -(zfar * znear) / (zfar - znear) return P def fov2focal(fov, pixels): return pixels / (2 * math.tan(fov / 2)) def focal2fov(focal, pixels): return 2*math.atan(pixels/(2*focal)) # the following functions depths_double_to_points and depth_double_to_normal are adopted from https://github.com/hugoycj/2dgs-gaustudio/blob/main/utils/graphics_utils.py def depths_double_to_points(view, depthmap1, depthmap2): W, H = view.image_width, view.image_height fx = W / (2 * math.tan(view.FoVx / 2.)) fy = H / (2 * math.tan(view.FoVy / 2.)) intrins_inv = torch.tensor( [[1/fx, 0.,-W/(2 * fx)], [0., 1/fy, -H/(2 * fy),], [0., 0., 1.0]] ).float().cuda() grid_x, grid_y = torch.meshgrid(torch.arange(W)+0.5, torch.arange(H)+0.5, indexing='xy') points = torch.stack([grid_x, grid_y, torch.ones_like(grid_x)], dim=0).reshape(3, -1).float().cuda() rays_d = intrins_inv @ points points1 = depthmap1.reshape(1,-1) * rays_d points2 = depthmap2.reshape(1,-1) * rays_d return points1.reshape(3,H,W), points2.reshape(3,H,W) def point_double_to_normal(view, points1, points2): points = torch.stack([points1, points2],dim=0) output = torch.zeros_like(points) dx = points[...,2:, 1:-1] - points[...,:-2, 1:-1] dy = points[...,1:-1, 2:] - points[...,1:-1, :-2] normal_map = torch.nn.functional.normalize(torch.cross(dx, dy, dim=1), dim=1) output[...,1:-1, 1:-1] = normal_map return output def depth_double_to_normal(view, depth1, depth2): points1, points2 = depths_double_to_points(view, depth1, depth2) return point_double_to_normal(view, points1, points2) def bilinear_sampler(img, coords, mask=False): """ Wrapper for grid_sample, uses pixel coordinates """ H, W = img.shape[-2:] xgrid, ygrid = coords.split([1,1], dim=-1) xgrid = 2*xgrid/(W-1) - 1 ygrid = 2*ygrid/(H-1) - 1 grid = torch.cat([xgrid, ygrid], dim=-1) img = torch.nn.functional.grid_sample(img, grid, align_corners=True) if mask: mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1) return img, mask.float() return img # project the reference point cloud into the source view, then project back #extrinsics here refers c2w def reproject_with_depth(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src): width, height = depth_ref.shape[1], depth_ref.shape[0] ## step1. project reference pixels to the source view # reference view x, y x_ref, y_ref = np.meshgrid(np.arange(0, width), np.arange(0, height)) x_ref, y_ref = x_ref.reshape([-1]), y_ref.reshape([-1]) # reference 3D space xyz_ref = np.matmul(np.linalg.inv(intrinsics_ref), np.vstack((x_ref, y_ref, np.ones_like(x_ref))) * depth_ref.reshape([-1])) # source 3D space xyz_src = np.matmul(np.matmul(extrinsics_src, np.linalg.inv(extrinsics_ref)), np.vstack((xyz_ref, np.ones_like(x_ref))))[:3] # source view x, y K_xyz_src = np.matmul(intrinsics_src, xyz_src) xy_src = K_xyz_src[:2] / K_xyz_src[2:3] ## step2. reproject the source view points with source view depth estimation # find the depth estimation of the source view x_src = xy_src[0].reshape([height, width]).astype(np.float32) y_src = xy_src[1].reshape([height, width]).astype(np.float32) sampled_depth_src = cv2.remap(depth_src, x_src, y_src, interpolation=cv2.INTER_LINEAR) # mask = sampled_depth_src > 0 # source 3D space # NOTE that we should use sampled source-view depth_here to project back xyz_src = np.matmul(np.linalg.inv(intrinsics_src), np.vstack((xy_src, np.ones_like(x_ref))) * sampled_depth_src.reshape([-1])) # reference 3D space xyz_reprojected = np.matmul(np.matmul(extrinsics_ref, np.linalg.inv(extrinsics_src)), np.vstack((xyz_src, np.ones_like(x_ref))))[:3] # source view x, y, depth depth_reprojected = xyz_reprojected[2].reshape([height, width]).astype(np.float32) K_xyz_reprojected = np.matmul(intrinsics_ref, xyz_reprojected) xy_reprojected = K_xyz_reprojected[:2] / K_xyz_reprojected[2:3] x_reprojected = xy_reprojected[0].reshape([height, width]).astype(np.float32) y_reprojected = xy_reprojected[1].reshape([height, width]).astype(np.float32) return depth_reprojected, x_reprojected, y_reprojected, x_src, y_src def check_geometric_consistency(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src, thre1=0.5, thre2=0.01): width, height = depth_ref.shape[1], depth_ref.shape[0] x_ref, y_ref = np.meshgrid(np.arange(0, width), np.arange(0, height)) depth_reprojected, x2d_reprojected, y2d_reprojected, x2d_src, y2d_src = reproject_with_depth(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src) # check |p_reproj-p_1| < 1 dist = np.sqrt((x2d_reprojected - x_ref) ** 2 + (y2d_reprojected - y_ref) ** 2) # check |d_reproj-d_1| / d_1 < 0.01 depth_diff = np.abs(depth_reprojected - depth_ref) relative_depth_diff = depth_diff / depth_ref mask = np.logical_and(dist < thre1, relative_depth_diff < thre2) # mask = dist < 0.2 depth_reprojected[~mask] = 0 return mask, depth_reprojected, x2d_src, y2d_src, relative_depth_diff