Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Helper functions mainly for data preparation. | |
""" | |
import cv2 | |
import numpy as np | |
from PIL import Image | |
# Extracting the image and mask from gr.ImageEditor. | |
def process_gradio_source(editor_data): | |
original_image = editor_data["background"].convert("RGB") | |
layers = editor_data.get("layers", []) | |
full_mask = Image.new("L", original_image.size, 0) | |
cropped_region = original_image.copy() | |
if not layers: | |
return full_mask, cropped_region, original_image | |
try: | |
layer_image = layers[0] | |
layer_pos = (0, 0) | |
if layer_image.mode != "RGBA": | |
layer_image = layer_image.convert("RGBA") | |
alpha_channel = layer_image.split()[-1] | |
full_mask = Image.new("L", original_image.size, 0) | |
full_mask.paste(alpha_channel, layer_pos) | |
full_mask = full_mask.point(lambda p: 255 if p > 128 else 0) | |
original_np = np.array(original_image) | |
mask_np = np.array(full_mask) | |
mask_bool = mask_np > 0 | |
cropped_array = np.zeros_like(original_np) | |
cropped_array[mask_bool] = original_np[mask_bool] | |
cropped_region = Image.fromarray(cropped_array) | |
except Exception as e: | |
print(f"Raise error: {str(e)}") | |
return original_image, full_mask, cropped_region | |
# Get bounding box from the mask. | |
def get_bbox_from_mask(mask_image): | |
mask_array = np.array(mask_image) | |
if mask_array.ndim == 3: | |
mask_array = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY) | |
_, binary_mask = cv2.threshold(mask_array, 127, 255, cv2.THRESH_BINARY) | |
contours, _ = cv2.findContours(mask_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
if not contours: | |
return None | |
largest_contour = max(contours, key=cv2.contourArea) | |
x, y, w, h = cv2.boundingRect(largest_contour) | |
return (x, y), (x + w, y + h) | |
# Crop image from the bounding box coord. | |
def crop_image_from_bb(original_image, top_left, bottom_right): | |
x1, y1 = map(int, top_left) | |
x2, y2 = map(int, bottom_right) | |
if not (0 <= x1 < x2 <= original_image.width and 0 <= y1 < y2 <= original_image.height): | |
raise ValueError("Invalid bounding box coordinates") | |
crop_box = (x1, y1, x2, y2) | |
cropped_image = original_image.crop(crop_box) | |
return cropped_image | |
# Resize image to the target size with zero paddings. | |
def resize_img_and_pad(input_image, target_size): | |
cropped_width, cropped_height = input_image.size | |
target_width, target_height = target_size | |
scale = min(target_width / cropped_width, target_height / cropped_height) | |
new_width = int(cropped_width * scale) | |
new_height = int(cropped_height * scale) | |
resized_image = input_image.resize((new_width, new_height), Image.BILINEAR) | |
padded_image = Image.new("RGB", target_size, (0, 0, 0)) | |
left_padding = (target_width - new_width) // 2 | |
top_padding = (target_height - new_height) // 2 | |
padded_image.paste(resized_image, (left_padding, top_padding)) | |
return padded_image | |
def pil_to_np(pil_img): | |
if pil_img.mode == 'RGBA': | |
rgb = pil_img.convert('RGB') | |
alpha = pil_img.split()[3] | |
img = np.array(rgb) | |
alpha = np.array(alpha) | |
else: | |
img = np.array(pil_img.convert('RGB')) | |
return img | |
# Helper function considering the VAE compression factor. | |
def nearest_multiple_of_16(ref_height): | |
lower = (ref_height // 16) * 16 | |
upper = ((ref_height + 15) // 16) * 16 | |
if ref_height - lower <= upper - ref_height: | |
return lower | |
else: | |
return upper | |
# Resize to generate the reference context. | |
def generate_context_reference_image(reference_image_pil, img_width=512): | |
reference_image_rgb = pil_to_np(reference_image_pil) | |
ref_height, ref_width = reference_image_rgb.shape[0], reference_image_rgb.shape[1] | |
ref_height = int((img_width / ref_width) * ref_height) | |
ref_height = nearest_multiple_of_16(ref_height) | |
reference_context = cv2.resize(reference_image_rgb, (img_width, ref_height)) | |
reference_new_pil = Image.fromarray(reference_context) | |
return reference_new_pil | |