Spaces:
Running
Running
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import cv2 | |
| from transformers import AutoImageProcessor, SegformerForSemanticSegmentation | |
| from imagehash import average_hash | |
| def load_model(): | |
| processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") | |
| model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") | |
| return processor, model | |
| def segment_person(image: Image.Image, processor, model): | |
| inputs = processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| upsampled_logits = torch.nn.functional.interpolate( | |
| logits, | |
| size=image.size[::-1], | |
| mode="bilinear", | |
| align_corners=False, | |
| ) | |
| pred_classes = upsampled_logits.argmax(dim=1)[0].cpu().numpy() | |
| mask = (pred_classes == 12).astype(np.uint8) * 255 # Class 12 = person | |
| # Clean mask | |
| kernel = np.ones((7, 7), np.uint8) | |
| eroded_mask = cv2.erode(mask, kernel, iterations=1) | |
| blurred_mask = cv2.GaussianBlur(eroded_mask, (3, 3), sigmaX=0, sigmaY=0) | |
| final_mask = blurred_mask.astype(np.float32) / 255.0 | |
| final_mask_3ch = np.stack([final_mask]*3, axis=-1) | |
| return final_mask_3ch | |
| def resize_image(image, size_percent): | |
| # Convert image to RGB if it's RGBA | |
| image = Image.fromarray(image).convert("RGB") | |
| width, height = image.size | |
| new_width = int(width * size_percent / 100) | |
| new_height = int(height * size_percent / 100) | |
| # Create new transparent image with original dimensions | |
| resized_image = Image.new('RGB', (width, height), (0, 0, 0)) | |
| # Resize original image | |
| scaled_content = image.resize((new_width, new_height)) | |
| # Calculate position to paste resized content in center | |
| x = (width - new_width) // 2 | |
| y = (height - new_height) // 2 | |
| # Paste resized content onto transparent background | |
| resized_image.paste(scaled_content, (x, y)) | |
| return resized_image | |
| # Check if two images are similar | |
| def check_image_similarity(image1, image2): | |
| hash1 = average_hash(Image.fromarray(image1)) | |
| hash2 = average_hash(Image.fromarray(image2)) | |
| return hash1 - hash2 < 10 | |
| def split_stereo_image(image): | |
| """ | |
| Splits an image into left and right halves for stereographic viewing. | |
| Args: | |
| image: PIL Image or numpy array | |
| Returns: | |
| tuple: (left_half, right_half) as numpy arrays | |
| """ | |
| # Convert to numpy array if PIL Image | |
| if isinstance(image, Image.Image): | |
| image = np.array(image) | |
| # Get width and calculate split point | |
| width = image.shape[1] | |
| split_point = width // 2 | |
| # Split into left and right halves | |
| left_half = image[:, :split_point] | |
| right_half = image[:, split_point:] | |
| #If stereo image is provided, return left and right halves | |
| if check_image_similarity(left_half, right_half): | |
| return left_half, right_half | |
| else: | |
| return image, resize_image(image, 99) | |
| def resize_image_to_width(person_img, background_img): | |
| # Resize image to match background dimensions | |
| if (background_img.shape[1] > background_img.shape[0]): | |
| width = background_img.shape[1] | |
| img_array = np.array(person_img) | |
| height = int(width * img_array.shape[0] / img_array.shape[1]) | |
| person_img = Image.fromarray(img_array).resize((width, height)) | |
| person_img = np.array(person_img) | |
| image = Image.fromarray(person_img) | |
| else: | |
| height = background_img.shape[0] | |
| img_array = np.array(person_img) | |
| width = int(height * img_array.shape[1] / img_array.shape[0]) | |
| person_img = Image.fromarray(img_array).resize((width, height)) | |
| person_img = np.array(person_img) | |
| image = Image.fromarray(person_img) | |
| return image | |
| def resize_mask(person_size, mask): | |
| scale_factor = person_size / 100.0 | |
| mask_height, mask_width = mask.shape[:2] | |
| new_height = int(mask_height * scale_factor) | |
| new_width = int(mask_width * scale_factor) | |
| # Convert mask to PIL Image for resizing | |
| mask_image = Image.fromarray((mask * 255).astype(np.uint8)) | |
| resized_mask = mask_image.resize((new_width, new_height)) | |
| # Convert back to numpy and normalize to 0-1 | |
| mask = np.array(resized_mask).astype(np.float32) / 255.0 | |
| # Add third channel dimension back if needed | |
| if len(mask.shape) == 2: | |
| mask = np.stack([mask] * 3, axis=-1) | |
| return mask | |
| def resize_images(image, person_size): | |
| image_np = np.array(image) | |
| # Resize image based on person_size percentage | |
| scale_factor = person_size / 100.0 | |
| img_height, img_width = image_np.shape[:2] | |
| new_height = int(img_height * scale_factor) | |
| new_width = int(img_width * scale_factor) | |
| # Convert image to PIL Image for resizing | |
| image_pil = Image.fromarray(image_np) | |
| resized_image = image_pil.resize((new_width, new_height)) | |
| # Convert back to numpy | |
| image = resized_image | |
| image_np = np.array(image) | |
| return image_np |