import random from typing import List import cv2 import numpy as np import torch from torchvision.transforms.functional import resize from ripe import utils log = utils.get_pylogger(__name__) def gridify(x, window_size): """Turn a tensor of BxCxHxW into a tensor of BxCx(H//window_size)x(W//window_size)x(window_size**2) Params: x: Input tensor of shape BxCxHxW window_size: Size of the window Returns: x: Output tensor of shape BxCx(H//window_size)x(W//window_size)x(window_size**2) """ assert x.dim() == 4, "Input tensor x must have 4 dimensions" B, C, H, W = x.shape x = ( x.unfold(2, window_size, window_size) .unfold(3, window_size, window_size) .reshape(B, C, H // window_size, W // window_size, window_size**2) ) return x def get_grid(B, H, W, device): x1_n = torch.meshgrid(*[torch.linspace(-1 + 1 / n, 1 - 1 / n, n, device=device) for n in (B, H, W)]) x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(B, H * W, 2) return x1_n def cv2_matches_from_kornia(match_dists: torch.Tensor, match_idxs: torch.Tensor) -> List[cv2.DMatch]: return [cv2.DMatch(idx[0].item(), idx[1].item(), d.item()) for idx, d in zip(match_idxs, match_dists)] def to_cv_kpts(kpts, scores): kp = kpts.cpu().numpy().astype(np.int16) s = scores.cpu().numpy() cv_kp = [cv2.KeyPoint(kp[i][0], kp[i][1], 6, 0, s[i]) for i in range(len(kp))] return cv_kp def resize_image(image, min_size=512, max_size=768): """Resize image to a new size while maintaining the aspect ratio. Params: image (torch.tensor): Image to be resized. min_size (int): Minimum size of the smaller dimension. max_size (int): Maximum size of the larger dimension. Returns: image: Resized image. """ h, w = image.shape[-2:] aspect_ratio = w / h if w > h: new_w = max(min_size, min(max_size, w)) new_h = int(new_w / aspect_ratio) else: new_h = max(min_size, min(max_size, h)) new_w = int(new_h * aspect_ratio) new_size = (new_h, new_w) image = resize(image, new_size) return image def get_rewards( reward, kps1, kps2, selected_mask1, selected_mask2, padding_mask1, padding_mask2, rel_idx_matches, abs_idx_matches, ransac_inliers, label, penalty=0.0, use_whitening=False, selected_only=False, filter_mode=None, ): with torch.no_grad(): reward *= 1.0 if label else -1.0 dense_returns = torch.zeros((len(kps1), len(kps2)), device=kps1.device) if filter_mode == "ignore": dense_returns[ abs_idx_matches[:, 0][ransac_inliers], abs_idx_matches[:, 1][ransac_inliers], ] = reward elif filter_mode == "punish": in_padding_area = ( padding_mask1[abs_idx_matches[:, 0]] & padding_mask2[abs_idx_matches[:, 1]] ) # both in the image area (not in padding area) dense_returns[ abs_idx_matches[:, 0][ransac_inliers & in_padding_area], abs_idx_matches[:, 1][ransac_inliers & in_padding_area], ] = reward dense_returns[ abs_idx_matches[:, 0][ransac_inliers & ~in_padding_area], abs_idx_matches[:, 1][ransac_inliers & ~in_padding_area], ] = -1.0 else: raise ValueError(f"Unknown filter mode: {filter_mode}") if selected_only: dense_returns = dense_returns[selected_mask1, :][:, selected_mask2] if filter_mode == "ignore" and not selected_only: dense_returns = dense_returns[padding_mask1, :][:, padding_mask2] if penalty != 0.0: # pos. pair: small penalty for not finding a match # neg. pair: small reward for not finding a match penalty_val = penalty if label else -penalty dense_returns[dense_returns == 0.0] = penalty_val if use_whitening: dense_returns = (dense_returns - dense_returns.mean()) / (dense_returns.std() + 1e-6) return dense_returns def get_other_random_id(idx: int, len_dataset: int, min_dist: int = 20): for _ in range(10): tgt_id = random.randint(0, len_dataset - 1) if abs(idx - tgt_id) >= min_dist: return tgt_id raise ValueError(f"Could not find target image with distance >= {min_dist} from source image {idx}") def cv_resize_and_pad_to_shape(image, new_shape, padding_color=(0, 0, 0)): """Resizes image to new_shape with maintaining the aspect ratio and pads with padding_color if needed. Params: image: Image to be resized. new_shape: Expected (height, width) of new image. padding_color: Tuple in BGR of padding color Returns: image: Resized image with padding """ h, w = image.shape[:2] scale_h = new_shape[0] / h scale_w = new_shape[1] / w scale = None if scale_w * h > new_shape[0]: scale = scale_h elif scale_h * w > new_shape[1]: scale = scale_w else: scale = max(scale_h, scale_w) new_w, new_h = int(round(w * scale)), int(round(h * scale)) image = cv2.resize(image, (new_w, new_h)) missing_h = new_shape[0] - new_h missing_w = new_shape[1] - new_w top, bottom = missing_h // 2, missing_h - (missing_h // 2) left, right = missing_w // 2, missing_w - (missing_w // 2) image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=padding_color) return image