jebin2's picture
new ch
bb49e0d
from PIL import Image, ImageDraw
import imageio.v2 as imageio
import cv2
import numpy as np
from sklearn.cluster import KMeans
import os
import shutil
from glob import glob
from typing import List, Union
from .config import Config
from shapely.geometry import Polygon
def remove_duplicate_boxes(boxes, compare_single=None, iou_threshold=0.7):
"""
Removes duplicate or highly overlapping boxes, keeping the larger one.
:param boxes: List of (x1, y1, x2, y2) boxes.
:param compare_single: Optional single box to compare against the list.
:param iou_threshold: IOU threshold to consider as duplicate.
:return:
- If compare_single is None: deduplicated list of boxes.
- If compare_single is provided: tuple (is_duplicate, updated_box_or_none)
"""
def compute_iou(boxA, boxB):
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA) * max(0, yB - yA)
if interArea == 0:
return 0.0
boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
return interArea / float(boxAArea + boxBArea - interArea)
def compute_area(box):
return (box[2] - box[0]) * (box[3] - box[1])
# Single comparison mode
if compare_single is not None:
single_area = compute_area(compare_single)
for existing_box in boxes:
iou = compute_iou(compare_single, existing_box)
if iou > iou_threshold:
existing_area = compute_area(existing_box)
if single_area > existing_area:
return True, compare_single # Keep new (larger) box
else:
return True, None # Existing box is better, discard new
return False, compare_single # No overlap found, keep it
# Bulk deduplication mode
unique_boxes = []
for box in boxes:
box_area = compute_area(box)
replaced_existing = False
# Check against existing unique boxes
for i, ubox in enumerate(unique_boxes):
if compute_iou(box, ubox) > iou_threshold:
ubox_area = compute_area(ubox)
# If current box is larger, replace the existing one
if box_area > ubox_area:
unique_boxes[i] = box
replaced_existing = True
# If existing box is larger or equal, ignore current box
break
# If no overlap found, add the box
if not replaced_existing and not any(compute_iou(box, ubox) > iou_threshold for ubox in unique_boxes):
unique_boxes.append(box)
print(f"βœ… Found {abs(len(unique_boxes) - len(boxes))} duplicates")
return unique_boxes
def count_panels_inside(target_box, other_boxes, height=None, width=None):
x1a, y1a, x2a, y2a = target_box
target_area = (x2a - x1a) * (y2a - y1a)
count = 0
total_covered_area = 0
for x1b, y1b, x2b, y2b in other_boxes:
if x1a <= x1b and y1a <= y1b and x2a >= x2b and y2a >= y2b:
count += 1
# Only apply area threshold check if height and width are provided
if height is not None and width is not None:
if total_covered_area / target_area < 0.8:
return 0
return count
def extend_boxes_to_image_border(boxes, image_shape, min_width_ratio, min_height_ratio):
"""
Extends any side of a bounding box to the image border if it's close enough.
:param boxes: List of (x1, y1, x2, y2) tuples.
:param image_shape: (height, width) of the image.
:param threshold: Pixel threshold to snap to border.
:return: List of adjusted boxes.
"""
if not boxes:
return boxes
extended_boxes = [list(box) for box in boxes]
width, height = image_shape
adjusted_boxes = []
width_threshold = width * min_width_ratio
height_threshold = height * min_height_ratio
# width_threshold = self.config.min_width_ratio * width
# height_threshold = self.config.min_height_ratio * height
percent_threshold=0.8
for x1, y1, x2, y2 in boxes:
box_width = x2 - x1
box_height = y2 - y1
# Snap if close to left or top
if abs(x1 - 0) <= width_threshold or box_width >= percent_threshold * width:
x1 = 0
if abs(y1 - 0) <= height_threshold or box_height >= percent_threshold * height:
y1 = 0
# Snap if close to right or bottom
if abs(x2 - width) <= width_threshold or box_width >= percent_threshold * width:
x2 = width
if abs(y2 - height) <= height_threshold or box_height >= percent_threshold * height:
y2 = height
adjusted_boxes.append((x1, y1, x2, y2))
return adjusted_boxes
def draw_black(image_path, accepted_boxes, output_path, stripe = True) -> str:
orig_pil = Image.fromarray(imageio.imread(image_path))
width, height = orig_pil.size
# Create a global stripe pattern (black and white horizontal stripes)
stripe_img = Image.new("RGB", (width, height), (255, 255, 255))
draw = ImageDraw.Draw(stripe_img)
stripe_height = 10
if stripe:
for y in range(0, height, stripe_height):
if (y // stripe_height) % 2 == 0:
draw.rectangle([0, y, width, min(y + stripe_height, height)], fill=(0, 0, 0))
# Create a mask where accepted boxes will be applied
mask = Image.new("L", (width, height), 0)
mask_draw = ImageDraw.Draw(mask)
for x1, y1, x2, y2 in accepted_boxes:
mask_draw.rectangle([x1, y1, x2, y2], fill=255)
# Paste the striped image only where mask is white (inside accepted boxes)
orig_pil.paste(stripe_img, (0, 0), mask)
orig_pil.save(output_path)
return output_path
def extend_to_nearby_boxes(boxes, image_shape, min_width_ratio, min_height_ratio):
"""
Extends boxes to the edge of any close neighboring box without causing
unintended merging by using an atomic update approach.
A box is represented by (x1, y1, x2, y2).
"""
if not boxes:
return boxes
width, height = image_shape
width_threshold = width * min_width_ratio
height_threshold = height * min_height_ratio
final_boxes = []
# For each box, calculate its new coordinates based on the original list
for i in range(len(boxes)):
# Start with the original coordinates for the box we're currently processing
x1, y1, x2, y2 = boxes[i]
# These will store the closest boundaries we can extend to,
# initialized to the image edges.
closest_left_boundary = 0
closest_right_boundary = width
closest_top_boundary = 0
closest_bottom_boundary = height
# Find the closest neighbor on each of the four sides by checking against ALL other boxes
for j in range(len(boxes)):
if i == j:
continue
x1_j, y1_j, x2_j, y2_j = boxes[j]
# Check for neighbors to the RIGHT of box `i`
is_vert_overlap = (y1 < y2_j and y2 > y1_j) # Do they overlap vertically?
is_right_neighbor = (x1_j >= x2) # Is box `j` to the right of `i`?
if is_vert_overlap and is_right_neighbor:
closest_right_boundary = min(closest_right_boundary, x1_j)
# Check for neighbors to the LEFT of box `i`
is_left_neighbor = (x2_j <= x1) # Is box `j` to the left of `i`?
if is_vert_overlap and is_left_neighbor:
closest_left_boundary = max(closest_left_boundary, x2_j)
# Check for neighbors BELOW box `i`
is_horiz_overlap = (x1 < x2_j and x2 > x1_j) # Do they overlap horizontally?
is_bottom_neighbor = (y1_j >= y2) # Is box `j` below `i`?
if is_horiz_overlap and is_bottom_neighbor:
closest_bottom_boundary = min(closest_bottom_boundary, y1_j)
# Check for neighbors ABOVE box `i`
is_top_neighbor = (y2_j <= y1) # Is box `j` above `i`?
if is_horiz_overlap and is_top_neighbor:
closest_top_boundary = max(closest_top_boundary, y2_j)
# --- Apply the calculated extensions ---
# Extend right if the closest gap on the right is within the threshold
if 0 < (closest_right_boundary - x2) <= width_threshold:
x2 = closest_right_boundary
# Extend left
if 0 < (x1 - closest_left_boundary) <= width_threshold:
x1 = closest_left_boundary
# Extend down
if 0 < (closest_bottom_boundary - y2) <= height_threshold:
y2 = closest_bottom_boundary
# Extend up
if 0 < (y1 - closest_top_boundary) <= height_threshold:
y1 = closest_top_boundary
final_boxes.append(tuple(map(int, (x1, y1, x2, y2))))
return final_boxes
def convert_to_grayscale_pil(input_path, output_path):
with Image.open(input_path) as img:
gray_img = img.convert("L") # "L" mode = grayscale
gray_img.save(output_path)
return output_path
def convert_to_clahe(input_path, output_path):
# Read image from disk
image = cv2.imread(input_path)
if image is None:
raise FileNotFoundError(f"Could not read image from path: {input_path}")
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply CLAHE
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
output = clahe.apply(gray)
# Save the processed image
cv2.imwrite(output_path, output)
return output_path
def convert_to_lab_l(input_path, output_path):
# Read image from disk
image = cv2.imread(input_path)
if image is None:
raise FileNotFoundError(f"Could not read image from path: {input_path}")
output = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)[:, :, 0]
# Save the processed image
cv2.imwrite(output_path, output)
return output_path
def convert_to_group_colors(input_path, output_path, num_clusters: int = 5):
# Load image
image = Image.open(input_path).convert("RGB")
np_image = np.array(image)
h, w = np_image.shape[:2]
pixels = np_image.reshape(-1, 3)
# Run KMeans
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init='auto')
labels = kmeans.fit_predict(pixels)
centers = kmeans.cluster_centers_.astype(np.uint8)
# Replace pixels with their cluster center color
clustered_pixels = centers[labels].reshape(h, w, 3)
# Save using OpenCV (convert RGB to BGR)
output = clustered_pixels[:, :, ::-1]
# Save the processed image
cv2.imwrite(output_path, output)
return output_path
def get_black_white_ratio(image_path: str, threshold: int = 128) -> dict:
"""
Calculate the ratio of black and white pixels in a binary image.
Args:
image_path: Path to the image file
threshold: Threshold value for binarization
Returns:
Dictionary with pixel ratios and counts
"""
# Load and process image
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
raise FileNotFoundError(f"Image not found: {image_path}")
# Convert to binary
_, binary = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)
# Calculate ratios
total_pixels = binary.size
white_count = np.count_nonzero(binary == 255)
black_count = total_pixels - white_count
return {
"black_ratio": black_count / total_pixels,
"white_ratio": white_count / total_pixels,
"black_count": black_count,
"white_count": white_count,
"total_pixels": total_pixels
}
def box_covered_ratio(boxes, image_shape) -> float:
"""
Calculate the ratio of area covered by boxes to the image area,
accounting for overlapping boxes by using a mask.
Args:
boxes (List[Tuple[int, int, int, int]]): List of (x1, y1, x2, y2) boxes.
image_shape (Tuple[int, int]): (width, height) of the image.
Returns:
float: Ratio between 0 and 1.
"""
width, height = image_shape
image_area = width * height
if image_area == 0 or not boxes:
return 0.0
# Create a white mask
mask = np.ones((height, width), dtype=np.uint8) * 255
# Draw black rectangles (panels)
for x1, y1, x2, y2 in boxes:
cv2.rectangle(mask, (x1, y1), (x2, y2), color=0, thickness=-1)
# Count black pixels
black_pixels = np.sum(mask == 0)
return black_pixels / image_area
def find_similar_remaining_regions(boxes, image_shape, debug_image_path, w_t=0.25, h_t=0.25):
"""
Find remaining regions not covered by original boxes that match any original box's
width and height within a given threshold.
Args:
boxes (List[Tuple[int, int, int, int]]): Original (x1, y1, x2, y2) boxes.
image_shape (Tuple[int, int]): (width, height) of the image.
debug_image_path (str): Path to save debug image.
w_t (float): Width threshold (e.g., 0.1 = Β±10%)
h_t (float): Height threshold (e.g., 0.1 = Β±10%)
Returns:
Tuple[List[Tuple[int, int, int, int]], np.ndarray]:
- List of new similar boxes
- Debug image with overlays
"""
width, height = image_shape
mask = np.ones((height, width), dtype=np.uint8) * 255
for x1, y1, x2, y2 in boxes:
mask[y1:y2, x1:x2] = 0
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not boxes:
return []
similar_boxes = []
debug_img = np.full((height, width, 3), 255, dtype=np.uint8)
# Draw original boxes in green
for x1, y1, x2, y2 in boxes:
cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 10)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
box = (x, y, x + w, y + h)
matched = False
for x1, y1, x2, y2 in boxes:
bw = x2 - x1
bh = y2 - y1
width_match = abs(w - bw) / bw <= w_t
height_match = abs(h - bh) / bh <= h_t
if width_match and height_match:
matched = True
break
if matched:
similar_boxes.append(box)
cv2.rectangle(debug_img, (x, y), (x + w, y + h), (255, 0, 0), 10) # Blue: Accepted
else:
cv2.rectangle(debug_img, (x, y), (x + w, y + h), (0, 0, 255), 10) # Red: Rejected
cv2.imwrite(debug_image_path, debug_img)
return similar_boxes
def get_remaining_areas(image_size, boxes):
"""
Given the image size and a list of bounding boxes, returns the remaining uncovered areas
as rectangles.
Args:
image_size: (width, height) of the image.
boxes: List of (x1, y1, x2, y2) rectangles.
Returns:
List of rectangles representing the remaining uncovered areas.
"""
width, height = image_size
# Create a binary mask of the image (0 = uncovered, 255 = covered)
mask = np.zeros((height, width), dtype=np.uint8)
# Mark the covered boxes
for x1, y1, x2, y2 in boxes:
mask[y1:y2, x1:x2] = 255
# Invert mask to get the remaining area
remaining_mask = cv2.bitwise_not(mask)
# Find contours in the remaining area
contours, _ = cv2.findContours(remaining_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
remaining_boxes = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
remaining_boxes.append((x, y, x + w, y + h))
return remaining_boxes
def is_valid_panel(
image_size,
boxes,
min_width_ratio: float,
min_height_ratio: float
):
"""
Check if each panel (box) is valid based on minimum width and height ratio of image size.
Args:
image_size: (width, height) of the image.
boxes: List of (x1, y1, x2, y2) panel boxes.
min_width_ratio: Minimum allowed width as a ratio of image width (e.g. 0.05).
min_height_ratio: Minimum allowed height as a ratio of image height (e.g. 0.05).
Returns:
List of booleans indicating if each panel is valid.
"""
image_width, image_height = image_size
min_width = image_width * min_width_ratio
min_height = image_height * min_height_ratio
validity = []
for x1, y1, x2, y2 in boxes:
box_width = x2 - x1
box_height = y2 - y1
is_valid = box_width >= min_width and box_height >= min_height
if is_valid:
validity.append((x1, y1, x2, y2))
return validity
def get_abs_path(relative_path: str) -> str:
"""Convert relative path to absolute path."""
return os.path.abspath(relative_path)
def get_image_paths(directories: Union[str, List[str]]) -> List[str]:
"""
Get all image paths from given directories.
Args:
directories: Single directory path or list of directory paths
Returns:
List of image file paths
"""
if isinstance(directories, str):
directories = [directories]
all_images = []
for directory in directories:
abs_dir = get_abs_path(directory)
if not os.path.isdir(abs_dir):
print(f"⚠️ Warning: Skipping non-directory {abs_dir}")
continue
# Support multiple image extensions
for ext in Config.SUPPORTED_EXTENSIONS:
pattern = os.path.join(abs_dir, f'*.{ext}')
images = sorted(glob(pattern))
all_images.extend(images)
return list(set(all_images)) # Remove duplicates
def clean_directory(directory: str, create_if_not_exists: bool = True) -> None:
"""Clean directory contents or create if it doesn't exist."""
shutil.rmtree(directory, ignore_errors=True)
if create_if_not_exists:
os.makedirs(directory, exist_ok=True)
def backup_file(source_path: str, backup_path: str) -> str:
"""Backup a file to specified location."""
backup_path = get_abs_path(backup_path)
os.makedirs(os.path.dirname(backup_path), exist_ok=True)
shutil.copy(source_path, backup_path)
print(f"βœ… File backed up to: {backup_path}")
return backup_path
def douglas_peucker_simplify(points, epsilon):
"""Simplify polygon using Douglas-Peucker algorithm"""
polygon = Polygon(points)
simplified = polygon.simplify(epsilon, preserve_topology=True)
return list(simplified.exterior.coords[:-1]) # Remove duplicate last point
def filter_close_points(points, min_distance=5.0):
"""Remove points that are closer than min_distance to previous point"""
if len(points) < 2:
return points
filtered = [points[0]]
for i in range(1, len(points)):
current = np.array(points[i])
previous = np.array(filtered[-1])
distance = np.linalg.norm(current - previous)
if distance >= min_distance:
filtered.append(points[i])
return filtered
def remove_thin_extensions_morphological(annotation_points, kernel_size=5):
"""Remove thin extensions using morphological operations"""
# Convert points to image mask
points_array = np.array(annotation_points)
min_x, min_y = np.min(points_array, axis=0).astype(int)
max_x, max_y = np.max(points_array, axis=0).astype(int)
# Create binary mask
mask = np.zeros((max_y - min_y + 20, max_x - min_x + 20), dtype=np.uint8)
# Adjust points to mask coordinates
adjusted_points = points_array - [min_x - 10, min_y - 10]
adjusted_points = adjusted_points.astype(np.int32)
# Fill polygon
cv2.fillPoly(mask, [adjusted_points], 255)
# Morphological operations to remove thin extensions
kernel = np.ones((kernel_size, kernel_size), np.uint8)
# Erosion removes thin parts
eroded = cv2.erode(mask, kernel, iterations=1)
# Dilation restores the main body
cleaned = cv2.dilate(eroded, kernel, iterations=1)
# Extract contour from cleaned mask
contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# Get the largest contour
largest_contour = max(contours, key=cv2.contourArea)
# Convert back to original coordinate system
cleaned_points = largest_contour.reshape(-1, 2) + [min_x - 10, min_y - 10]
return cleaned_points.tolist()
return annotation_points
def str_format(points_list):
"""Convert points list to segmentation format string"""
# Points should be a list of tuples/lists [(x1, y1), (x2, y2), ...]
coords = []
for point in points_list:
coords.extend([point[0], point[1]])
# Format as string with 6 decimal places
coords_str = ' '.join(f'{coord:.6f}' for coord in coords)
print(coords_str)
return coords_str
def array_format(coords_str):
"""Convert segmentation format string to points list"""
# Parse coords_str to list of floats
coords = list(map(float, coords_str.split()))
# Convert to list of points [(x1, y1), (x2, y2), ...]
points = [(coords[i], coords[i+1]) for i in range(0, len(coords), 2)]
print(points)
return points
def normalize_segmentation(annotations, min_distance=8.0, epsilon=5.0, remove_extensions=True):
"""Complete normalization pipeline for segmentation points"""
processed_annotations = []
for annotation in annotations:
if annotation["type"] == "segmentation":
original_points = [(p["x"], p["y"]) for p in annotation["points"]]
# Step 1: Remove thin extensions first (if enabled)
normalized_points = remove_thin_extensions_morphological(original_points, kernel_size=7)
# Step 2: Filter out points too close together
normalized_points = filter_close_points(normalized_points, min_distance)
# Step 3: Apply Douglas-Peucker simplification
normalized_points = douglas_peucker_simplify(normalized_points, epsilon)
# Update annotation with normalized points
annotation["points"] = [{"x": p[0], "y": p[1]} for p in normalized_points]
processed_annotations.append(annotation)
return processed_annotations