import torch import numpy as np import cv2 import math from ..models import FluxFillPipeline def get_smooth_mask(general_mask, ksize=(120, 120)): r"""Generate a smooth mask from the general mask using morphological dilation. Args: general_mask (np.ndarray): The input mask to be smoothed, expected to be a binary mask with shape [H, W] and dtype uint8 (0 or 1). ksize (tuple): The size of the structuring element used for dilation, specified as (height, width). Default is (120, 120). Returns: np.ndarray: The smoothed mask, with the same shape as the input mask, where the values are either 0 or 1 (uint8). """ # Ensure kernel size is a tuple of integers ksize = (int(ksize[0]), int(ksize[1])) # Create rectangular structuring element for dilation kernel = cv2.getStructuringElement(cv2.MORPH_RECT, ksize) # Apply dilation to expand mask regions mask_array = cv2.dilate(general_mask.astype( np.uint8), kernel) # [1024, 2048] uint8 1 # Convert back to binary mask mask_array = (mask_array > 0).astype(np.uint8) return mask_array def build_inpaint_model(model_path, lora_path, subfolder, device=0): r"""Build the inpainting model pipeline. Args: model_path (str): The path to the pre-trained model. lora_path (str): The path to the LoRA weights. device (int): The device ID to load the model onto (default: 0). Returns: pipe: The inpainting pipeline object. """ # Initialize pipeline with bfloat16 precision for memory efficiency pipe = FluxFillPipeline.from_pretrained( model_path, torch_dtype=torch.bfloat16).to(f"cuda:{device}") pipe.load_lora_weights( lora_path, subfolder=subfolder, weight_name="lora.safetensors", # default weight name torch_dtype=torch.bfloat16 ) pipe.enable_model_cpu_offload() # save some VRAM by offloading the model to CPU pipe.device_id = device return pipe def get_adaptive_smooth_mask_ksize_ctrl(general_masks, mask_infos, basek=100, threshold=10000, r=1): r"""Generate a smooth mask with adaptive kernel size control based on mask area. Args: general_masks (np.ndarray): The input mask array, expected to be a 2D array of shape [H, W] where each pixel value corresponds to a mask ID. mask_infos (list): A list of dictionaries containing information about each mask, including the area and label of the mask. basek (int): The base kernel size for smoothing, default is 100. threshold (int): The area threshold to determine the scaling factor for the kernel size, default is 10000. r (int): A scaling factor for the kernel size, default is 1. Returns: np.ndarray: The smoothed mask array, with the same shape as the input mask, where the values are either 0 or 1 (uint8). """ # Initialize output mask mask_array = np.zeros_like(general_masks).astype(np.bool_) # Process each mask region individually for i in range(len(mask_infos)): mask_info = mask_infos[i] area = mask_info["area"] # Calculate size ratio with threshold clamping ratio = area / threshold ratio = math.sqrt(min(ratio, 1.0)) # Extract current object mask mask = (general_masks == i + 1).astype(np.uint8) # Default kernel for other objects mask = get_smooth_mask(mask, ksize=( int(basek*ratio)*r, int((basek+10)*ratio)*r)).astype(np.bool_) # Combine with existing masks mask_array = np.logical_or(mask_array, mask) return mask_array.astype(np.uint8)