Spaces:

LuJingyi
/

Inpaint4Drag

Running on Zero

LuJingyi-John

Add title and project link to interface

6fce8cc 6 days ago

13 kB

	import spaces

	import numpy as np
	import cv2
	import torch
	from typing import Union

	def contour_to_points_and_mask(contour: np.ndarray, image_shape: tuple) -> tuple[np.ndarray, np.ndarray]:
	"""Convert a contour to a set of points and binary mask.

	This function takes a contour and creates both a binary mask and a list of points
	that lie within the contour. The points are represented in (x, y) coordinates.

	Args:
	contour (np.ndarray): Input contour of shape (N, 2) or (N, 1, 2) where N is
	the number of points. Each point should be in (x, y) format.
	image_shape (tuple): Shape of the output mask as (height, width).

	Returns:
	tuple:
	- np.ndarray: Array of points in (x, y) format with shape (M, 2),
	where M is the number of points inside the contour.
	Returns empty array of shape (0, 2) if contour is empty.
	- np.ndarray: Binary mask of shape image_shape where pixels inside
	the contour are 255 and outside are 0.
	"""
	if len(contour) == 0:
	return np.zeros((0, 2), dtype=np.int32), np.zeros(image_shape, dtype=np.uint8)

	# Create empty mask and fill the contour in the mask
	mask = np.zeros(image_shape, dtype=np.uint8)
	cv2.drawContours(mask, [contour.reshape(-1, 1, 2)], -1, 255, cv2.FILLED)

	# Get points inside contour (y, x) and convert to (x, y)
	points = np.column_stack(np.where(mask)).astype(np.int32)[:, [1, 0]]

	# Return empty array if no points found
	if len(points) == 0:
	points = np.zeros((0, 2), dtype=np.int32)

	return points, mask

	def find_control_points(
	region_points: torch.Tensor,
	source_control_points: torch.Tensor,
	target_control_points: torch.Tensor,
	distance_threshold: float = 1e-6
	) -> tuple[torch.Tensor, torch.Tensor]:
	"""Find control points that match points within a region.

	This function identifies which control points lie within or very close to
	the specified region points. It matches source control points to region points
	and returns both source and corresponding target control points that satisfy
	the distance threshold criterion.

	Args:
	region_points (torch.Tensor): Points defining a region, shape (N, 2).
	Each point is in (x, y) format.
	source_control_points (torch.Tensor): Source control points, shape (M, 2).
	Each point is in (x, y) format.
	target_control_points (torch.Tensor): Target control points, shape (M, 2).
	Must have same first dimension as source_control_points.
	distance_threshold (float, optional): Maximum distance for a point to be
	considered matching. Defaults to 1e-6.

	Returns:
	tuple[torch.Tensor, torch.Tensor]:
	- Matched source control points, shape (K, 2) where K ≤ M
	- Corresponding target control points, shape (K, 2)
	If no matches found or inputs empty, returns empty tensors of shape (0, 2)
	"""
	# Handle empty input cases
	if len(region_points) == 0 or len(source_control_points) == 0:
	return (
	torch.zeros((0, 2), device=source_control_points.device),
	torch.zeros((0, 2), device=target_control_points.device)
	)

	# Calculate pairwise distances between source control points and region points
	distances = torch.cdist(source_control_points, region_points)

	# Find points that are within threshold distance of any region point
	min_distances = distances.min(dim=1)[0]
	matching_indices = min_distances < distance_threshold

	# Return matched pairs of control points
	return source_control_points[matching_indices], target_control_points[matching_indices]

	def interpolate_points_with_weighted_directions(
	points: torch.Tensor,
	reference_points: torch.Tensor,
	direction_vectors: torch.Tensor,
	max_reference_points: int = 100,
	num_nearest_neighbors: int = 4,
	eps: float = 1e-6
	) -> torch.Tensor:
	"""Interpolate points based on weighted directions from nearest reference points.

	This function moves each point by a weighted combination of direction vectors.
	The weights are determined by the inverse distances to the nearest reference points.
	If there are too many reference points, they are subsampled for efficiency.

	Args:
	points (torch.Tensor): Points to interpolate, shape (N, 2) in (x, y) format
	reference_points (torch.Tensor): Reference point locations, shape (M, 2)
	direction_vectors (torch.Tensor): Direction vectors for each reference point,
	shape (M, 2), must match reference_points first dimension
	max_reference_points (int, optional): Maximum number of reference points to use.
	If exceeded, points are subsampled. Defaults to 100.
	num_nearest_neighbors (int, optional): Number of nearest neighbors to consider
	for interpolation. Defaults to 4.
	eps (float, optional): Small value to avoid division by zero. Defaults to 1e-6.

	Returns:
	torch.Tensor: Interpolated points with shape (N, 2). If input points or
	references are empty, returns the input points unchanged.
	"""
	# Handle empty input cases
	if len(points) == 0 or len(reference_points) == 0:
	return points

	# Handle single reference point case
	if len(reference_points) == 1:
	return points + direction_vectors

	# Subsample reference points if too many
	if len(reference_points) > max_reference_points:
	indices = torch.linspace(0, len(reference_points)-1, max_reference_points).long()
	reference_points = reference_points[indices]
	direction_vectors = direction_vectors[indices]

	# Calculate distances to all reference points
	distances = torch.cdist(points, reference_points)

	# Find k nearest neighbors (k = min(num_nearest_neighbors, num_references))
	k = min(num_nearest_neighbors, len(reference_points))
	topk_distances, neighbor_indices = torch.topk(
	distances,
	k=k,
	dim=1,
	largest=False
	)

	# Calculate weights based on inverse distances
	weights = 1.0 / (topk_distances + eps)
	weights = weights / weights.sum(dim=1, keepdim=True)

	# Get directions for nearest neighbors and compute weighted average
	neighbor_directions = direction_vectors[neighbor_indices]
	weighted_directions = (weights.unsqueeze(-1) * neighbor_directions).sum(dim=1)

	# Apply weighted directions and round to nearest integer
	interpolated_points = (points + weighted_directions).round().float()

	return interpolated_points

	def get_points_within_image_bounds(
	points: torch.Tensor,
	image_shape: tuple[int, int]
	) -> torch.Tensor:
	"""Create a boolean mask for points that lie within image boundaries.

	Identifies which points from the input tensor fall within valid image coordinates.
	Points are assumed to be in (x, y) format, while image_shape is in (height, width) format.

	Args:
	points (torch.Tensor): Points to check, shape (N, 2) in (x, y) format.
	x coordinates correspond to width/columns
	y coordinates correspond to height/rows
	image_shape (tuple[int, int]): Image dimensions as (height, width).

	Returns:
	torch.Tensor: Boolean mask of shape (N,) where True indicates the point
	is within bounds. Returns empty tensor of shape (0,) if input is empty.
	"""
	# Handle empty input case
	if len(points) == 0:
	return torch.zeros(0, dtype=torch.bool, device=points.device)

	# Unpack image dimensions
	height, width = image_shape

	# Check both x and y coordinates are within bounds
	x_in_bounds = (points[:, 0] >= 0) & (points[:, 0] < width)
	y_in_bounds = (points[:, 1] >= 0) & (points[:, 1] < height)

	# Combine conditions
	valid_points_mask = x_in_bounds & y_in_bounds

	return valid_points_mask

	@spaces.GPU
	def bi_warp(
	region_mask: np.ndarray,
	control_points: Union[np.ndarray, torch.Tensor],
	kernel_size: int = 5
	) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
	"""Generate corresponding source/target points and inpainting mask for masked regions.

	Args:
	region_mask: Binary mask defining regions of interest (2D array with 0s and 1s)
	control_points: Alternating source and target control points. Shape (N*2, 2)
	kernel_size: Controls dilation kernel size. Must be odd number or 0.
	Contour thickness will be (kernel_size-1)*2 (default: 5)
	Set to 0 for no contour drawing and no dilation.

	Returns:
	tuple containing:
	- Source points (M, 2)
	- Target points (M, 2)
	- Inpainting mask combined with target contour mask
	"""
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	image_shape = region_mask.shape

	# Ensure kernel_size is odd or 0
	kernel_size = max(0, kernel_size)
	if kernel_size > 0 and kernel_size % 2 == 0:
	kernel_size += 1

	# 1. Initialize tensors and masks
	control_points = torch.tensor(control_points, dtype=torch.float32, device=device) if not isinstance(control_points, torch.Tensor) else control_points
	source_control_points = control_points[0:-1:2]
	target_control_points = control_points[1::2]

	combined_source_mask = np.zeros(image_shape, dtype=np.uint8)
	combined_target_mask = np.zeros(image_shape, dtype=np.uint8)
	region_mask_binary = np.where(region_mask > 0, 1, 0).astype(np.uint8)
	contour_mask = np.zeros(image_shape, dtype=np.uint8)

	# 2. Process regions
	contours = cv2.findContours(region_mask_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
	all_source_points = []
	all_target_points = []

	for contour in contours:
	if len(contour) == 0:
	continue

	# 3. Get source region points and mask
	source_contour = torch.from_numpy(contour[:, 0, :]).float().to(device)
	source_region_points, source_mask = contour_to_points_and_mask(contour[:, 0, :], image_shape)
	source_mask = (source_mask > 0).astype(np.uint8)

	if len(source_region_points) == 0:
	continue

	source_region_points = torch.from_numpy(source_region_points).float().to(device)

	# 4. Transform points
	source, target = find_control_points(source_region_points, source_control_points, target_control_points)
	if len(source) == 0:
	continue

	directions = target - source
	target_contour = interpolate_points_with_weighted_directions(source_contour, source, directions)
	interpolated_target = interpolate_points_with_weighted_directions(source_region_points, source, directions)

	# 5. Get target region points and mask
	target_region_points, target_mask = contour_to_points_and_mask(target_contour.cpu().int().numpy(), image_shape)
	target_mask = (target_mask > 0).astype(np.uint8)

	if len(target_region_points) == 0:
	continue

	# Draw target contour
	target_contour_np = target_contour.cpu().int().numpy()
	if kernel_size > 0:
	cv2.drawContours(contour_mask, [target_contour_np], -1, 1, kernel_size)

	target_region = torch.from_numpy(target_region_points).float().to(device)

	# 6. Apply reverse transformation
	back_directions = source_region_points - interpolated_target
	interpolated_source = interpolate_points_with_weighted_directions(target_region, interpolated_target, back_directions)

	# 7. Filter valid points
	valid_mask = get_points_within_image_bounds(interpolated_source, image_shape)
	if valid_mask.any():
	all_source_points.append(interpolated_source[valid_mask])
	all_target_points.append(target_region[valid_mask])
	combined_source_mask = np.logical_or(combined_source_mask, source_mask).astype(np.uint8)
	combined_target_mask = np.logical_or(combined_target_mask, target_mask).astype(np.uint8)

	# 8. Handle empty case
	if not all_source_points:
	return np.zeros((0, 2), dtype=np.int32), np.zeros((0, 2), dtype=np.int32), np.zeros(image_shape, dtype=np.uint8)

	# 9. Finalize outputs
	final_source = torch.cat(all_source_points).cpu().numpy().astype(np.int32)
	final_target = torch.cat(all_target_points).cpu().numpy().astype(np.int32)

	# Create and combine masks
	inpaint_mask = np.logical_and(combined_source_mask, np.logical_not(combined_target_mask)).astype(np.uint8)
	if kernel_size > 0:
	kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
	inpaint_mask = cv2.dilate(inpaint_mask, kernel)
	final_mask = np.logical_or(inpaint_mask, contour_mask).astype(np.uint8)

	return final_source, final_target, final_mask