Spaces:
Running
on
Zero
Running
on
Zero
import random | |
from typing import List | |
import cv2 | |
import numpy as np | |
import torch | |
from torchvision.transforms.functional import resize | |
from ripe import utils | |
log = utils.get_pylogger(__name__) | |
def gridify(x, window_size): | |
"""Turn a tensor of BxCxHxW into a tensor of | |
BxCx(H//window_size)x(W//window_size)x(window_size**2) | |
Params: | |
x: Input tensor of shape BxCxHxW | |
window_size: Size of the window | |
Returns: | |
x: Output tensor of shape BxCx(H//window_size)x(W//window_size)x(window_size**2) | |
""" | |
assert x.dim() == 4, "Input tensor x must have 4 dimensions" | |
B, C, H, W = x.shape | |
x = ( | |
x.unfold(2, window_size, window_size) | |
.unfold(3, window_size, window_size) | |
.reshape(B, C, H // window_size, W // window_size, window_size**2) | |
) | |
return x | |
def get_grid(B, H, W, device): | |
x1_n = torch.meshgrid(*[torch.linspace(-1 + 1 / n, 1 - 1 / n, n, device=device) for n in (B, H, W)]) | |
x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(B, H * W, 2) | |
return x1_n | |
def cv2_matches_from_kornia(match_dists: torch.Tensor, match_idxs: torch.Tensor) -> List[cv2.DMatch]: | |
return [cv2.DMatch(idx[0].item(), idx[1].item(), d.item()) for idx, d in zip(match_idxs, match_dists)] | |
def to_cv_kpts(kpts, scores): | |
kp = kpts.cpu().numpy().astype(np.int16) | |
s = scores.cpu().numpy() | |
cv_kp = [cv2.KeyPoint(kp[i][0], kp[i][1], 6, 0, s[i]) for i in range(len(kp))] | |
return cv_kp | |
def resize_image(image, min_size=512, max_size=768): | |
"""Resize image to a new size while maintaining the aspect ratio. | |
Params: | |
image (torch.tensor): Image to be resized. | |
min_size (int): Minimum size of the smaller dimension. | |
max_size (int): Maximum size of the larger dimension. | |
Returns: | |
image: Resized image. | |
""" | |
h, w = image.shape[-2:] | |
aspect_ratio = w / h | |
if w > h: | |
new_w = max(min_size, min(max_size, w)) | |
new_h = int(new_w / aspect_ratio) | |
else: | |
new_h = max(min_size, min(max_size, h)) | |
new_w = int(new_h * aspect_ratio) | |
new_size = (new_h, new_w) | |
image = resize(image, new_size) | |
return image | |
def get_rewards( | |
reward, | |
kps1, | |
kps2, | |
selected_mask1, | |
selected_mask2, | |
padding_mask1, | |
padding_mask2, | |
rel_idx_matches, | |
abs_idx_matches, | |
ransac_inliers, | |
label, | |
penalty=0.0, | |
use_whitening=False, | |
selected_only=False, | |
filter_mode=None, | |
): | |
with torch.no_grad(): | |
reward *= 1.0 if label else -1.0 | |
dense_returns = torch.zeros((len(kps1), len(kps2)), device=kps1.device) | |
if filter_mode == "ignore": | |
dense_returns[ | |
abs_idx_matches[:, 0][ransac_inliers], | |
abs_idx_matches[:, 1][ransac_inliers], | |
] = reward | |
elif filter_mode == "punish": | |
in_padding_area = ( | |
padding_mask1[abs_idx_matches[:, 0]] & padding_mask2[abs_idx_matches[:, 1]] | |
) # both in the image area (not in padding area) | |
dense_returns[ | |
abs_idx_matches[:, 0][ransac_inliers & in_padding_area], | |
abs_idx_matches[:, 1][ransac_inliers & in_padding_area], | |
] = reward | |
dense_returns[ | |
abs_idx_matches[:, 0][ransac_inliers & ~in_padding_area], | |
abs_idx_matches[:, 1][ransac_inliers & ~in_padding_area], | |
] = -1.0 | |
else: | |
raise ValueError(f"Unknown filter mode: {filter_mode}") | |
if selected_only: | |
dense_returns = dense_returns[selected_mask1, :][:, selected_mask2] | |
if filter_mode == "ignore" and not selected_only: | |
dense_returns = dense_returns[padding_mask1, :][:, padding_mask2] | |
if penalty != 0.0: | |
# pos. pair: small penalty for not finding a match | |
# neg. pair: small reward for not finding a match | |
penalty_val = penalty if label else -penalty | |
dense_returns[dense_returns == 0.0] = penalty_val | |
if use_whitening: | |
dense_returns = (dense_returns - dense_returns.mean()) / (dense_returns.std() + 1e-6) | |
return dense_returns | |
def get_other_random_id(idx: int, len_dataset: int, min_dist: int = 20): | |
for _ in range(10): | |
tgt_id = random.randint(0, len_dataset - 1) | |
if abs(idx - tgt_id) >= min_dist: | |
return tgt_id | |
raise ValueError(f"Could not find target image with distance >= {min_dist} from source image {idx}") | |
def cv_resize_and_pad_to_shape(image, new_shape, padding_color=(0, 0, 0)): | |
"""Resizes image to new_shape with maintaining the aspect ratio and pads with padding_color if | |
needed. | |
Params: | |
image: Image to be resized. | |
new_shape: Expected (height, width) of new image. | |
padding_color: Tuple in BGR of padding color | |
Returns: | |
image: Resized image with padding | |
""" | |
h, w = image.shape[:2] | |
scale_h = new_shape[0] / h | |
scale_w = new_shape[1] / w | |
scale = None | |
if scale_w * h > new_shape[0]: | |
scale = scale_h | |
elif scale_h * w > new_shape[1]: | |
scale = scale_w | |
else: | |
scale = max(scale_h, scale_w) | |
new_w, new_h = int(round(w * scale)), int(round(h * scale)) | |
image = cv2.resize(image, (new_w, new_h)) | |
missing_h = new_shape[0] - new_h | |
missing_w = new_shape[1] - new_w | |
top, bottom = missing_h // 2, missing_h - (missing_h // 2) | |
left, right = missing_w // 2, missing_w - (missing_w // 2) | |
image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=padding_color) | |
return image | |