jebin2's picture
new flow
2353a2a
from typing import List, Tuple
from pathlib import Path
from .config import Config
import numpy as np
import cv2
from skimage.morphology import skeletonize, remove_small_objects
from skimage.measure import label
from skimage import measure
from tqdm import tqdm
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
import math
class ImageProcessor:
"""Handles image preprocessing operations."""
def __init__(self, config: Config = None):
self.config = config or Config()
self.index = 0
def get_output_path(self, output_folder, file_name):
self.index += 1
return f'{output_folder}/{self.index:02d}_{file_name}'
def mask_text_regions(self, input_path, bboxes: List[List[int]], output_filename: str = "1_text_removed.jpg", color: Tuple[int, int, int] = (0, 0, 0)) -> str:
"""Mask text regions in the image to reduce panel extraction noise."""
image = cv2.imread(input_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {input_path}")
for bbox in bboxes:
x1, y1, x2, y2 = bbox
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1)
output_path = f'{self.config.output_folder}/{output_filename}'
cv2.imwrite(output_path, image)
return str(output_path)
def preprocess_image(self, processed_image_path) -> Tuple[str, str, str]:
"""Preprocess image for panel extraction."""
image = cv2.imread(processed_image_path)
if image is None:
raise FileNotFoundError(f"Could not load image: {processed_image_path}")
# Convert to grayscale and binary
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply Gaussian blur to reduce noise
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
# Canny edge detection
edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilated = cv2.dilate(edges, kernel, iterations=2)
# Save intermediate results
gray_path = self.get_output_path(self.config.output_folder, "gray.jpg")
binary_path = self.get_output_path(self.config.output_folder, "binary.jpg")
dilated_path = self.get_output_path(self.config.output_folder, "dilated.jpg")
cv2.imwrite(str(gray_path), gray)
cv2.imwrite(str(binary_path), edges)
cv2.imwrite(str(dilated_path), dilated)
return str(gray_path), str(binary_path), str(dilated_path)
def invert_if_black_dominates(self, binary):
# Threshold to binary image
_, binary = cv2.threshold(binary, 127, 255, cv2.THRESH_BINARY)
# Count black and white pixels
black_pixels = np.sum(binary == 0)
white_pixels = np.sum(binary == 255)
# If black dominates, invert
if black_pixels > white_pixels:
print("πŸ”„ Inverting image because black > white")
inverted = cv2.bitwise_not(binary)
else:
print("βœ… No inversion needed, white >= black")
inverted = binary
# Save result
return inverted, black_pixels > white_pixels
def group_colors(self, processed_image_path, num_clusters: int = 5, file_name="group_colors.jpg", output_folder=None) -> Image.Image:
"""
Groups similar colors in an image using KMeans clustering.
Args:
processed_image_path (str): Path to the image to be color-grouped.
num_clusters (int): Number of color clusters to form.
file_name (str): Name of the output image file.
output_folder (str): Optional output directory.
Returns:
str: Path to the saved grouped-color image.
"""
output_folder = output_folder or self.config.output_folder
# Load image
image = Image.open(processed_image_path).convert("RGB")
np_image = np.array(image)
h, w = np_image.shape[:2]
pixels = np_image.reshape(-1, 3)
# Run KMeans
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init='auto')
labels = kmeans.fit_predict(pixels)
centers = kmeans.cluster_centers_.astype(np.uint8)
# Replace pixels with their cluster center color
clustered_pixels = centers[labels].reshape(h, w, 3)
# Save using OpenCV (convert RGB to BGR)
output_path = self.get_output_path(output_folder, file_name)
clustered_bgr = clustered_pixels[:, :, ::-1]
cv2.imwrite(output_path, clustered_bgr)
return str(output_path)
def thin_image_borders(self, processed_image_path: str, file_name="thin_border.jpg", output_folder=None) -> str:
"""
Clean dilated image by thinning thick borders and removing hanging clusters.
"""
output_folder = output_folder or self.config.output_folder
# Load image
img = cv2.imread(processed_image_path)
# Convert to grayscale and binary
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
# Apply Gaussian blur to reduce noise
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
# Canny edge detection
edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
# Skeletonize
skeleton = skeletonize(edges).astype(np.uint8)
# Remove small hanging clusters
labeled = label(skeleton, connectivity=2)
cleaned = remove_small_objects(labeled, min_size=150) # Adjust min_size for more/less pruning
# Convert back to 0–255 uint8 image
final = (cleaned > 0).astype(np.uint8) * 255
# Invert back if needed
result = 255 - final
# Save
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, result)
return str(output_path)
def remove_dangling_lines(self, image_path, file_name="dangling_lines_removed.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
gray = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Threshold to binary mask (black lines = True, white = False)
binary = gray < 128 # black parts (lines/dangling strokes)
binary = binary.astype(bool)
# Label connected components
labeled = label(binary, connectivity=2)
# Remove small connected components (dangling lines, fragments)
cleaned = remove_small_objects(labeled, min_size=500) # Adjust min_size as needed
# Convert back to mask (255 = black lines kept, 255 background = white)
final_mask = (cleaned > 0).astype(np.uint8) * 255
# Invert mask to match original layout: black lines on white background
final_image = 255 - final_mask
# Save result
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, final_image)
return output_path
def remove_diagonal_lines(self, image_path, file_name="remove_diagonal_lines.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Read the image
img = cv2.imread(image_path)
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Create binary image (black lines on white background)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Create kernels for detecting horizontal and vertical lines
# Adjust kernel size based on your image - larger for thicker lines
kernel_length = max(gray.shape[0], gray.shape[1]) // 30
# Horizontal kernel
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
# Vertical kernel
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
# Detect horizontal lines
horizontal_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
# Detect vertical lines
vertical_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
# Combine horizontal and vertical lines
rect_lines = cv2.addWeighted(horizontal_lines, 1, vertical_lines, 1, 0)
# Create final result - white background with black rectangular lines only
result = np.ones_like(gray) * 255 # White background
result[rect_lines > 0] = 0 # Black lines where rectangular lines were detected
# Save result
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, result)
return output_path
def thick_black(self, image_path, thickness=20, file_name="thick_black.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Load image
img = cv2.imread(image_path)
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Create a binary mask where black pixels are 1 (foreground)
_, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY_INV)
# Define kernel size based on desired thickness
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (thickness, thickness))
# Dilate the black areas
dilated = cv2.dilate(binary, kernel, iterations=1)
# Invert back so black is 0 again
# result_mask = cv2.bitwise_not(dilated)
# Apply mask on original image
result = img.copy()
result[np.where(dilated == 255)] = (0, 0, 0)
# Save result
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, result)
return output_path
def to_int_box(self, line):
return map(int, line[0]) # Works for both Hough and LSD formats
def remove_diagonal_lines_and_set_white(self, image_path, file_name="remove_diagonal_lines_and_set_white.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Load image
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
edges = cv2.Canny(blurred, 50, 150, apertureSize=3)
# Dilate to connect broken segments
kernel = np.ones((2, 2), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)
# More sensitive Hough transform
# HoughLinesP_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=5, maxLineGap=10)
# Detect lines using Hough Transform
lsd = cv2.createLineSegmentDetector(0)
lines, _, _, _ = lsd.detect(gray)
# Copy image to edit
output = image.copy()
combined_lines = []
if lines is not None:
combined_lines.extend(lines)
# if HoughLinesP_lines is not None:
# combined_lines.extend(HoughLinesP_lines)
if combined_lines is not None:
for line in combined_lines:
x1, y1, x2, y2 = self.to_int_box(line) # Convert float to int
# Calculate angle
angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
# Filter out horizontal and vertical lines
if (80 < angle < 100) or (170 < angle < 190) or angle < 10 or angle > 350:
continue
else:
# Get bounding box with padding
padding = 2
xmin = min(x1, x2) - padding
xmax = max(x1, x2) + padding
ymin = min(y1, y2) - padding
ymax = max(y1, y2) + padding
# Draw white rectangle (erase diagonal line)
cv2.rectangle(output, (xmin, ymin), (xmax, ymax), (255, 255, 255), thickness=-1)
# Save cleaned image
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, output)
return output_path
def remove_small_regions(self, image_path, file_name="remove_small_regions.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Load image in grayscale
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
visual = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # For debugging with colored rectangles
if img is None:
raise FileNotFoundError(f"Could not load image: {image_path}")
height_, width_ = img.shape
min_area = height_ * width_ * self.config.min_area_ratio
# Threshold: make black = foreground
_, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
# Label connected regions
labeled = measure.label(binary)
regions = measure.regionprops(labeled)
# Create clean mask (copy of original binary)
clean_mask = np.copy(binary)
for region in regions:
area = region.area
minr, minc, maxr, maxc = region.bbox
width = maxc - minc
height = maxr - minr
# Bounding box filter
if width < width_ * self.config.min_width_ratio and height < height_ * self.config.min_height_ratio:
if (width/width_) < 0.9 and (height/height_) < 0.9:
clean_mask[labeled == region.label] = 0 # Remove small region
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 0, 255), 2)
continue
# Crop and analyze region for line orientation
region_crop = binary[minr:maxr, minc:maxc]
edges = cv2.Canny(region_crop, 50, 150, apertureSize=3)
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=10, maxLineGap=5)
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180.0 / np.pi)
# length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
line_width = abs(x2 - x1)
line_height = abs(y2 - y1)
if line_height < height_ * self.config.min_height_ratio and line_width < width_ * self.config.min_width_ratio:
break
else:
# Only runs if no 'break' occurred
# If no qualifying line found, remove region
clean_mask[labeled == region.label] = 0
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (0, 255, 255), 2)
elif width < width_ * self.config.min_width_ratio and height < height_ * self.config.min_height_ratio:
# No lines, remove region
clean_mask[labeled == region.label] = 0
cv2.rectangle(visual, (minc, minr), (maxc, maxr), (255, 0, 0), 2)
# Save debug visualization
output_path = self.get_output_path(output_folder, f"debug_{file_name}")
cv2.imwrite(output_path, visual)
# Invert back to original format: black lines on white
cleaned = cv2.bitwise_not(clean_mask)
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, cleaned)
return output_path
def thin_black(self, image_path, file_name="thin_black.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Load the image (replace 'debug_dilated.jpg' with your actual file path if needed)
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Check if the image loaded correctly
if img is None:
raise ValueError("Image not loaded. Check the file path.")
# Threshold to binary (invert if lines are black on white)
_, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
# Perform thinning to reduce to 1-pixel lines
try:
# Use Zhang-Suen thinning if opencv-contrib is installed
thinned = cv2.ximgproc.thinning(binary)
except AttributeError:
# Fallback: Morphological skeletonization
skel = np.zeros(binary.shape, np.uint8)
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
while True:
eroded = cv2.erode(binary, element)
temp = cv2.dilate(eroded, element)
temp = cv2.subtract(binary, temp)
skel = cv2.bitwise_or(skel, temp)
binary = eroded.copy()
if cv2.countNonZero(binary) == 0:
break
thinned = skel
# Invert back if needed (for white lines on black background)
thinned = 255 - thinned
# Save result
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, thinned)
return output_path
def thin_lines_direct(self, image_path, file_name="thin_lines_direct.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Read image
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
raise ValueError("Could not load image")
# Convert to binary (0 = black lines, 255 = white background)
_, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
# Create result image (start with white background)
result = np.full_like(binary, 255) # All white
height, width = binary.shape
print("Processing thick lines...")
# Method 1: Scan rows - for each thick horizontal segment, keep only bottom pixel
print("Step 1: Thinning horizontal segments...")
for row in range(height):
col = 0
while col < width:
# If we hit a black pixel
if binary[row, col] == 0: # Black pixel
# Find the end of this horizontal segment
start_col = col
while col < width and binary[row, col] == 0:
col += 1
end_col = col - 1
# For this horizontal segment, check if it's part of a thick vertical region
segment_width = end_col - start_col + 1
if segment_width >= 1: # Any horizontal segment
# Check how thick this region is vertically at the middle
mid_col = (start_col + end_col) // 2
# Find vertical thickness at this point
thickness = self.get_vertical_thickness(binary, row, mid_col)
if thickness > 1:
# This is part of a thick region - keep only the bottom pixel
bottom_row = row + thickness - 1
if bottom_row < height:
result[bottom_row, start_col:end_col+1] = 0 # Draw black line
else:
# Already thin - keep as is
result[row, start_col:end_col+1] = 0
else:
col += 1
# Save step 1
# cv2.imwrite(f'{self.config.output_folder}/step1_horizontal_thinned.png', result)
# Method 2: Scan columns - for each thick vertical segment, keep only right pixel
print("Step 2: Thinning vertical segments...")
# Start fresh for vertical processing
result_v = np.full_like(binary, 255) # All white
for col in range(width):
row = 0
while row < height:
# If we hit a black pixel
if binary[row, col] == 0: # Black pixel
# Find the end of this vertical segment
start_row = row
while row < height and binary[row, col] == 0:
row += 1
end_row = row - 1
segment_height = end_row - start_row + 1
if segment_height >= 1: # Any vertical segment
# Check how thick this region is horizontally at the middle
mid_row = (start_row + end_row) // 2
# Find horizontal thickness at this point
thickness = self.get_horizontal_thickness(binary, mid_row, col)
if thickness > 1:
# This is part of a thick region - keep only the right pixel
right_col = col + thickness - 1
if right_col < width:
result_v[start_row:end_row+1, right_col] = 0 # Draw black line
else:
# Already thin - keep as is
result_v[start_row:end_row+1, col] = 0
else:
row += 1
# Save step 2
# cv2.imwrite(f'{self.config.output_folder}/step2_vertical_thinned.png', result_v)
# Method 3: Combine both results
print("Step 3: Combining results...")
final_result = cv2.bitwise_and(result, result_v) # Keep both thin lines
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, final_result)
return output_path
def get_vertical_thickness(self, binary, start_row, col):
"""Get the vertical thickness of a black region starting from start_row, col"""
height = binary.shape[0]
thickness = 0
row = start_row
while row < height and binary[row, col] == 0: # Black pixel
thickness += 1
row += 1
return thickness
def get_horizontal_thickness(self, binary, row, start_col):
"""Get the horizontal thickness of a black region starting from row, start_col"""
width = binary.shape[1]
thickness = 0
col = start_col
while col < width and binary[row, col] == 0: # Black pixel
thickness += 1
col += 1
return thickness
def remove_diagonal_only_cells(self, image_path, file_name="remove_diagonal_only_cells.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Load the image in grayscale
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
raise ValueError("Unable to load the image. Check the file path.")
# Threshold to binary (invert if lines are black on white background)
_, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
# Pad image to handle border cells easily
padded = np.pad(binary, pad_width=1, mode='constant', constant_values=0)
rows, cols = binary.shape
output = padded.copy()
# Scan each cell (excluding padding)
for r in range(1, rows + 1):
for c in range(1, cols + 1):
if padded[r, c] == 255: # Assuming white (255) represents active cells/lines
# Get 8 neighbors
neighbors = {
'top_left': padded[r-1, c-1],
'top': padded[r-1, c],
'top_right': padded[r-1, c+1],
'left': padded[r, c-1],
'right': padded[r, c+1],
'bottom_left': padded[r+1, c-1],
'bottom': padded[r+1, c],
'bottom_right': padded[r+1, c+1]
}
# Helper: Count active neighbors (255)
active_count = sum(1 for v in neighbors.values() if v == 255)
# Conditions as specified:
# 1) Only top-left and bottom-right
cond1 = (neighbors['top_left'] == 255 and neighbors['bottom_right'] == 255 and
active_count == 2)
# 2) Only top-left
cond2 = (neighbors['top_left'] == 255 and active_count == 1)
# 3) Only bottom-right
cond3 = (neighbors['bottom_right'] == 255 and active_count == 1)
# 4) Only top-right and bottom-left
cond4 = (neighbors['top_right'] == 255 and neighbors['bottom_left'] == 255 and
active_count == 2)
# 5) Only top-right
cond5 = (neighbors['top_right'] == 255 and active_count == 1)
# 6) Only bottom-left
cond6 = (neighbors['bottom_left'] == 255 and active_count == 1)
# Remove cell if any condition matches (set to 0)
if cond1 or cond2 or cond3 or cond4 or cond5 or cond6:
output[r, c] = 0
# Remove padding and invert back to original style (black lines on white)
cleaned = output[1:-1, 1:-1]
result = cv2.bitwise_not(cleaned)
# Save the result
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, result)
return output_path
def remove_small_continuity_components(
self,
image_path,
file_name="remove_small_continuity_components.jpg",
output_folder=None,
):
output_folder = output_folder or self.config.output_folder
# Load the image in grayscale
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
raise ValueError("Unable to load the image. Check the file path.")
height, width = img.shape
min_height = height * self.config.min_height_ratio
min_width = width * self.config.min_width_ratio
# Threshold to binary (invert if lines are black on white background)
_, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
# Perform connected component labeling (8-connectivity)
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(binary, connectivity=8)
# Create output copies
cleaned_output = binary.copy()
debug_output = cv2.cvtColor(binary.copy(), cv2.COLOR_GRAY2BGR) # For visualizing removed components
for label in tqdm(range(1, num_labels), desc="Processing labels"):
x, y, w, h, area = stats[label]
# Filter out small components based on width and height
if h < min_height and w < min_width:
cleaned_output[labels == label] = 0
debug_output[labels == label] = [0, 0, 255] # Mark removed components in red
# Invert back to original style
final_result = cv2.bitwise_not(cleaned_output)
# Save the final and debug outputs
output_path = self.get_output_path(output_folder, file_name)
debug_path = self.get_output_path(output_folder, file_name.replace(".jpg", "_debug.jpg"))
cv2.imwrite(output_path, final_result)
cv2.imwrite(debug_path, debug_output)
return output_path
def connect_horizontal_vertical_gaps(self, image_path, file_name='connected_output.jpg', output_folder=None):
output_folder = output_folder or self.config.output_folder
image = cv2.imread(image_path)
height, width = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
# Detect all lines
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
output = image.copy()
def angle_of_line(x1, y1, x2, y2):
return abs(math.degrees(math.atan2(y2 - y1, x2 - x1)))
# Filter for only horizontal (β‰ˆ0Β°) and vertical (β‰ˆ90Β°) lines
filtered_lines = []
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
angle = angle_of_line(x1, y1, x2, y2)
min_width = 0
min_height = 0
if angle < 5:
line_width = abs(x2 - x1)
if line_width >= min_width:
filtered_lines.append([x1, y1, x2, y2])
elif 85 < angle < 95:
line_height = abs(y2 - y1)
if line_height >= min_height:
filtered_lines.append([x1, y1, x2, y2])
# Merge similar lines (if needed)
merged_lines = []
used = [False] * len(filtered_lines)
horizontal_alignment_threshold = 5
horizontal_distance_threshold = width * self.config.min_width_ratio
vertical_alignment_threshold = 5
vertical_distance_threshold = height * self.config.min_height_ratio
overlap_allowance = 10
for i in range(len(filtered_lines)):
if used[i]:
continue
x1a, y1a, x2a, y2a = filtered_lines[i]
merged = [x1a, y1a, x2a, y2a]
used[i] = True
for j in range(i + 1, len(filtered_lines)):
if used[j]:
continue
x1b, y1b, x2b, y2b = filtered_lines[j]
# Check if both are horizontal
if abs(y1a - y2a) < horizontal_alignment_threshold and abs(y1b - y2b) < horizontal_alignment_threshold and abs(y1a - y1b) < horizontal_distance_threshold:
if max(x1a, x2a) >= min(x1b, x2b) - overlap_allowance or max(x1b, x2b) >= min(x1a, x2a) - overlap_allowance:
merged = [
min(merged[0], merged[2], x1b, x2b),
y1a,
max(merged[0], merged[2], x1b, x2b),
y1a
]
used[j] = True
# Check if both are vertical
elif abs(x1a - x2a) < vertical_alignment_threshold and abs(x1b - x2b) < vertical_alignment_threshold and abs(x1a - x1b) < vertical_distance_threshold:
if max(y1a, y2a) >= min(y1b, y2b) - overlap_allowance or max(y1b, y2b) >= min(y1a, y2a) - overlap_allowance:
merged = [
x1a,
min(merged[1], merged[3], y1b, y2b),
x1a,
max(merged[1], merged[3], y1b, y2b)
]
used[j] = True
merged_lines.append(merged)
# Draw merged lines
for x1, y1, x2, y2 in merged_lines:
cv2.line(output, (x1, y1), (x2, y2), (0, 0, 0), 20)
# Save the result
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, output)
return output_path
def detect_small_objects_and_set_white(self, image_path, file_name="detect_small_objects_and_set_white.jpg", output_folder=None):
output_folder = output_folder or self.config.output_folder
# Load image
image = cv2.imread(image_path)
height, width = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Threshold to binary
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Find contours (external only or all)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw bounding boxes
output = image.copy()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < height * self.config.min_height_ratio and w < width * self.config.min_width_ratio:
cv2.rectangle(output, (x, y), (x + w, y + h), (255, 255, 255), -1)
# Save output
output_path = self.get_output_path(output_folder, file_name)
cv2.imwrite(output_path, output)
return output_path