Spaces:

Realcat
/

image-matching-webui

Running on Zero

File size: 5,639 Bytes

e6ac593

import random
from typing import List

import cv2
import numpy as np
import torch
from torchvision.transforms.functional import resize

from ripe import utils

log = utils.get_pylogger(__name__)


def gridify(x, window_size):
    """Turn a tensor of BxCxHxW into a tensor of
    BxCx(H//window_size)x(W//window_size)x(window_size**2)

    Params:
        x: Input tensor of shape BxCxHxW
        window_size: Size of the window

    Returns:
        x: Output tensor of shape BxCx(H//window_size)x(W//window_size)x(window_size**2)
    """

    assert x.dim() == 4, "Input tensor x must have 4 dimensions"

    B, C, H, W = x.shape
    x = (
        x.unfold(2, window_size, window_size)
        .unfold(3, window_size, window_size)
        .reshape(B, C, H // window_size, W // window_size, window_size**2)
    )

    return x


def get_grid(B, H, W, device):
    x1_n = torch.meshgrid(*[torch.linspace(-1 + 1 / n, 1 - 1 / n, n, device=device) for n in (B, H, W)])
    x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(B, H * W, 2)
    return x1_n


def cv2_matches_from_kornia(match_dists: torch.Tensor, match_idxs: torch.Tensor) -> List[cv2.DMatch]:
    return [cv2.DMatch(idx[0].item(), idx[1].item(), d.item()) for idx, d in zip(match_idxs, match_dists)]


def to_cv_kpts(kpts, scores):
    kp = kpts.cpu().numpy().astype(np.int16)
    s = scores.cpu().numpy()

    cv_kp = [cv2.KeyPoint(kp[i][0], kp[i][1], 6, 0, s[i]) for i in range(len(kp))]

    return cv_kp


def resize_image(image, min_size=512, max_size=768):
    """Resize image to a new size while maintaining the aspect ratio.

    Params:
        image (torch.tensor): Image to be resized.
        min_size (int): Minimum size of the smaller dimension.
        max_size (int): Maximum size of the larger dimension.

    Returns:
        image: Resized image.
    """

    h, w = image.shape[-2:]

    aspect_ratio = w / h

    if w > h:
        new_w = max(min_size, min(max_size, w))
        new_h = int(new_w / aspect_ratio)
    else:
        new_h = max(min_size, min(max_size, h))
        new_w = int(new_h * aspect_ratio)

    new_size = (new_h, new_w)

    image = resize(image, new_size)

    return image


def get_rewards(
    reward,
    kps1,
    kps2,
    selected_mask1,
    selected_mask2,
    padding_mask1,
    padding_mask2,
    rel_idx_matches,
    abs_idx_matches,
    ransac_inliers,
    label,
    penalty=0.0,
    use_whitening=False,
    selected_only=False,
    filter_mode=None,
):
    with torch.no_grad():
        reward *= 1.0 if label else -1.0

        dense_returns = torch.zeros((len(kps1), len(kps2)), device=kps1.device)

        if filter_mode == "ignore":
            dense_returns[
                abs_idx_matches[:, 0][ransac_inliers],
                abs_idx_matches[:, 1][ransac_inliers],
            ] = reward
        elif filter_mode == "punish":
            in_padding_area = (
                padding_mask1[abs_idx_matches[:, 0]] & padding_mask2[abs_idx_matches[:, 1]]
            )  # both in the image area (not in padding area)

            dense_returns[
                abs_idx_matches[:, 0][ransac_inliers & in_padding_area],
                abs_idx_matches[:, 1][ransac_inliers & in_padding_area],
            ] = reward
            dense_returns[
                abs_idx_matches[:, 0][ransac_inliers & ~in_padding_area],
                abs_idx_matches[:, 1][ransac_inliers & ~in_padding_area],
            ] = -1.0
        else:
            raise ValueError(f"Unknown filter mode: {filter_mode}")

        if selected_only:
            dense_returns = dense_returns[selected_mask1, :][:, selected_mask2]
        if filter_mode == "ignore" and not selected_only:
            dense_returns = dense_returns[padding_mask1, :][:, padding_mask2]

        if penalty != 0.0:
            # pos. pair: small penalty for not finding a match
            # neg. pair: small reward for not finding a match
            penalty_val = penalty if label else -penalty

            dense_returns[dense_returns == 0.0] = penalty_val

        if use_whitening:
            dense_returns = (dense_returns - dense_returns.mean()) / (dense_returns.std() + 1e-6)

    return dense_returns


def get_other_random_id(idx: int, len_dataset: int, min_dist: int = 20):
    for _ in range(10):
        tgt_id = random.randint(0, len_dataset - 1)
        if abs(idx - tgt_id) >= min_dist:
            return tgt_id

    raise ValueError(f"Could not find target image with distance >= {min_dist} from source image {idx}")


def cv_resize_and_pad_to_shape(image, new_shape, padding_color=(0, 0, 0)):
    """Resizes image to new_shape with maintaining the aspect ratio and pads with padding_color if
    needed.

    Params:
        image: Image to be resized.
        new_shape: Expected (height, width) of new image.
        padding_color: Tuple in BGR of padding color
    Returns:
        image: Resized image with padding
    """
    h, w = image.shape[:2]

    scale_h = new_shape[0] / h
    scale_w = new_shape[1] / w

    scale = None
    if scale_w * h > new_shape[0]:
        scale = scale_h
    elif scale_h * w > new_shape[1]:
        scale = scale_w
    else:
        scale = max(scale_h, scale_w)

    new_w, new_h = int(round(w * scale)), int(round(h * scale))

    image = cv2.resize(image, (new_w, new_h))

    missing_h = new_shape[0] - new_h
    missing_w = new_shape[1] - new_w

    top, bottom = missing_h // 2, missing_h - (missing_h // 2)
    left, right = missing_w // 2, missing_w - (missing_w // 2)

    image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=padding_color)
    return image