Spaces:

fffiloni
/

Stand-In

Running on Zero

File size: 6,243 Bytes

26557da

import os
import cv2
import requests
import torch
import numpy as np
import PIL.Image
import PIL.ImageOps
from insightface.app import FaceAnalysis
from facexlib.parsing import init_parsing_model
from torchvision.transforms.functional import normalize
from typing import Union, Optional


def _img2tensor(img: np.ndarray, bgr2rgb: bool = True) -> torch.Tensor:
    if bgr2rgb:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0
    img = np.transpose(img, (2, 0, 1))
    return torch.from_numpy(img)


def _pad_to_square(img: np.ndarray, pad_color: int = 255) -> np.ndarray:
    h, w, _ = img.shape
    if h == w:
        return img

    if h > w:
        pad_size = (h - w) // 2
        padded_img = cv2.copyMakeBorder(
            img,
            0,
            0,
            pad_size,
            h - w - pad_size,
            cv2.BORDER_CONSTANT,
            value=[pad_color] * 3,
        )
    else:
        pad_size = (w - h) // 2
        padded_img = cv2.copyMakeBorder(
            img,
            pad_size,
            w - h - pad_size,
            0,
            0,
            cv2.BORDER_CONSTANT,
            value=[pad_color] * 3,
        )

    return padded_img


class FaceProcessor:
    def __init__(self, antelopv2_path=".", device: Optional[torch.device] = None):
        if device is None:
            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            self.device = device

        providers = (
            ["CUDAExecutionProvider"]
            if self.device.type == "cuda"
            else ["CPUExecutionProvider"]
        )
        self.app = FaceAnalysis(
            name="antelopev2", root=antelopv2_path, providers=providers
        )
        self.app.prepare(ctx_id=0, det_size=(640, 640))

        self.parsing_model = init_parsing_model(
            model_name="bisenet", device=self.device
        )
        self.parsing_model.eval()

        print("FaceProcessor initialized successfully.")

    def process(
        self,
        image: Union[str, PIL.Image.Image],
        resize_to: int = 512,
        border_thresh: int = 10,
        face_crop_scale: float = 1.5,
        extra_input: bool = False,
    ) -> PIL.Image.Image:
        if isinstance(image, str):
            if image.startswith("http://") or image.startswith("https://"):
                image = PIL.Image.open(requests.get(image, stream=True, timeout=10).raw)
            elif os.path.isfile(image):
                image = PIL.Image.open(image)
            else:
                raise ValueError(
                    f"Input string is not a valid URL or file path: {image}"
                )
        elif not isinstance(image, PIL.Image.Image):
            raise TypeError(
                "Input must be a file path, a URL, or a PIL.Image.Image object."
            )

        image = PIL.ImageOps.exif_transpose(image).convert("RGB")

        frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

        faces = self.app.get(frame)
        h, w, _ = frame.shape
        image_to_process = None

        if not faces:
            print(
                "[Warning] No face detected. Using the whole image, padded to square."
            )
            image_to_process = _pad_to_square(frame, pad_color=255)
        else:
            largest_face = max(
                faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1])
            )
            x1, y1, x2, y2 = map(int, largest_face.bbox)

            is_close_to_border = (
                x1 <= border_thresh
                and y1 <= border_thresh
                and x2 >= w - border_thresh
                and y2 >= h - border_thresh
            )

            if is_close_to_border:
                print(
                    "[Info] Face is close to border, padding original image to square."
                )
                image_to_process = _pad_to_square(frame, pad_color=255)
            else:
                cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
                side = int(max(x2 - x1, y2 - y1) * face_crop_scale)
                half = side // 2

                left = max(cx - half, 0)
                top = max(cy - half, 0)
                right = min(cx + half, w)
                bottom = min(cy + half, h)

                cropped_face = frame[top:bottom, left:right]
                image_to_process = _pad_to_square(cropped_face, pad_color=255)

        image_resized = cv2.resize(
            image_to_process, (resize_to, resize_to), interpolation=cv2.INTER_AREA
        )

        face_tensor = (
            _img2tensor(image_resized, bgr2rgb=True).unsqueeze(0).to(self.device)
        )
        with torch.no_grad():
            normalized_face = normalize(face_tensor, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
            parsing_out = self.parsing_model(normalized_face)[0]
            parsing_mask = parsing_out.argmax(dim=1, keepdim=True)

        background_mask_np = (parsing_mask.squeeze().cpu().numpy() == 0).astype(
            np.uint8
        )
        white_background = np.ones_like(image_resized, dtype=np.uint8) * 255
        mask_3channel = cv2.cvtColor(background_mask_np * 255, cv2.COLOR_GRAY2BGR)
        result_img_bgr = np.where(mask_3channel == 255, white_background, image_resized)
        result_img_rgb = cv2.cvtColor(result_img_bgr, cv2.COLOR_BGR2RGB)
        img_white_bg = PIL.Image.fromarray(result_img_rgb)
        if extra_input:
            # 2. Create image with transparent background (new logic)
            # Create an alpha channel: 255 for foreground (not background), 0 for background
            alpha_channel = (parsing_mask.squeeze().cpu().numpy() != 0).astype(
                np.uint8
            ) * 255

            # Convert the resized BGR image to RGB
            image_resized_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)

            # Stack RGB channels with the new alpha channel
            rgba_image = np.dstack((image_resized_rgb, alpha_channel))

            # Create PIL image from the RGBA numpy array
            img_transparent_bg = PIL.Image.fromarray(rgba_image, "RGBA")

            return img_white_bg, img_transparent_bg
        else:
            return img_white_bg