yamanavijayavardhan's picture
update_new_new_new
2ad0600
raw
history blame
8.53 kB
import numpy as np
import cv2
# import matplotlib.pyplot as plt
import sys
import os
import tempfile
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
cordinates =[]
def four_point_transform(image, pts):
try:
rect = pts
(tl, tr, br, bl) = rect
# Compute the width of the new image
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# Compute the height of the new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
rect = np.array(rect, dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
except Exception as e:
logger.error(f"Error in four_point_transform: {str(e)}")
return image
def remove_shadow(image):
try:
rgb_planes = cv2.split(image)
result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255,
norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)
result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)
return result, result_norm
except Exception as e:
logger.error(f"Error in remove_shadow: {str(e)}")
return image, image
def analise(image, binary_image1, x_scaling, y_scaling):
try:
line = []
kernel = np.ones((1,250), np.uint8)
dilation = cv2.dilate(image, kernel, iterations=2)
contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for i in reversed(contours):
x, y, w, h = cv2.boundingRect(i)
if cv2.contourArea(i) < 20 or h < 8:
continue
scaling_factor_in_y = 0.5
scaling_factor_in_x = 0
resized_contour = i.copy()
resized_contour = i * [x_scaling, y_scaling]
resized_contour = resized_contour.astype(int)
final_image__ = np.zeros_like(binary_image1)
cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
kernel_dil = np.ones((3,3), np.uint8)
final_image__ = cv2.dilate(final_image__, kernel_dil, iterations=3)
line_image_final = cv2.bitwise_and(final_image__, binary_image1)
line.append(line_image_final)
return line
except Exception as e:
logger.error(f"Error in analise: {str(e)}")
return []
def image_resize_and_erosion(image):
try:
height, width = image.shape[:2]
height = height + 1 * height
height = int(height)
resized_image = cv2.resize(image, (width, height))
kernel = np.ones((13,1), np.uint8)
erosion = cv2.erode(resized_image, kernel, iterations=1)
return erosion
except Exception as e:
logger.error(f"Error in image_resize_and_erosion: {str(e)}")
return image
x_scaling = 0
y_scaling = 0
binary_image1 = 0
line = 0
line_length = 0
count = 0
def convert_image(img):
try:
# Create temporary directory for processing
temp_dir = tempfile.mkdtemp()
images_dir = os.path.join(temp_dir, 'images')
os.makedirs(images_dir, exist_ok=True)
logger.info("Starting image conversion...")
logger.info(f"Input image shape: {img.shape}")
# Ensure image is in correct format
if len(img.shape) == 2: # If grayscale
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
elif img.shape[2] == 4: # If RGBA
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
img = img.astype(np.uint8)
img_copy = np.copy(img)
line_length = 250
rect_image = img
# Remove shadow
logger.info("Removing shadow...")
image1, image2_ = remove_shadow(rect_image)
# Convert to grayscale
logger.info("Converting to grayscale...")
gray_ = cv2.cvtColor(image2_, cv2.COLOR_BGR2GRAY)
# Convert to binary with adaptive thresholding
logger.info("Converting to binary...")
binary_image_ = cv2.adaptiveThreshold(
gray_, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
inverted_binary_image_ = 255 - binary_image_
binary_image1 = np.copy(inverted_binary_image_)
y_height, x_width = rect_image.shape[:2]
logger.info(f"Image dimensions: {x_width}x{y_height}")
# Resize image
new_width = 500*5
new_height = 705*5
x_scaling = x_width/new_width
y_scaling = y_height/new_height
logger.info("Resizing image...")
rect_image = cv2.resize(rect_image, (new_width, new_height),
interpolation=cv2.INTER_NEAREST)
# Process resized image
logger.info("Processing resized image...")
image1, image2 = remove_shadow(rect_image)
gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
binary_image = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
inverted_binary_image = 255 - binary_image
# Apply morphological operations
kernel = np.ones((2,2), np.uint8)
erosion = cv2.erode(inverted_binary_image, kernel, iterations=1)
dilation = cv2.dilate(erosion, kernel, iterations=1)
new_image = np.copy(dilation)
new_image = 255 - new_image
# Find text regions
kernel = np.ones((1,250), np.uint8)
dilation_1 = cv2.dilate(dilation, kernel, iterations=2)
contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
line = []
logger.info(f"Found {len(contours)} contours")
for i in reversed(contours):
x, y, w, h = cv2.boundingRect(i)
if cv2.contourArea(i) < 20 or h < 10:
continue
cv2.drawContours(new_image, [i], -1, (0), 2)
final_image_ = np.zeros_like(binary_image)
cv2.drawContours(final_image_, [i], 0, (255), -1)
line_image = cv2.bitwise_and(final_image_, dilation)
line.extend(analise(line_image, binary_image1, x_scaling, y_scaling))
count = 0
kernel1 = np.ones((8,8), np.uint8)
word__image = []
logger.info(f"Processing {len(line)} lines")
for line_image in line:
dilation_2 = cv2.dilate(line_image, kernel1, iterations=2)
contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
for j in sorted_contours:
x1, y1, w1, h1 = cv2.boundingRect(j)
if w1 < 5 or h1 < 5: # Skip very small regions
continue
final_image = line_image[y1:y1+h1, x1:x1+w1]
final_image = 255 - final_image
word__image.append(final_image)
count += 1
logger.info(f"Extracted {count} words")
return word__image
except Exception as e:
logger.error(f"Error in convert_image: {str(e)}")
return []
# img = cv2.imread("ans_image/1.jpg")
# convert_image(img)