import numpy as np import cv2 # import matplotlib.pyplot as plt import sys import os import tempfile import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Global variables TEMP_IMAGES_DIR = None cordinates = [] def four_point_transform(image, pts): try: rect = pts (tl, tr, br, bl) = rect # Compute the width of the new image widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) # Compute the height of the new image heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") rect = np.array(rect, dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped except Exception as e: logger.error(f"Error in four_point_transform: {str(e)}") return image def remove_shadow(image): try: rgb_planes = cv2.split(image) result_planes = [] result_norm_planes = [] for plane in rgb_planes: dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8)) bg_img = cv2.medianBlur(dilated_img, 21) diff_img = 255 - cv2.absdiff(plane, bg_img) norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) result_planes.append(diff_img) result_norm_planes.append(norm_img) result = cv2.merge(result_planes) result_norm = cv2.merge(result_norm_planes) return result, result_norm except Exception as e: logger.error(f"Error in remove_shadow: {str(e)}") return image, image def analise(image, binary_image1, x_scaling, y_scaling): try: line = [] kernel = np.ones((1,250), np.uint8) dilation = cv2.dilate(image, kernel, iterations=2) contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for i in reversed(contours): x, y, w, h = cv2.boundingRect(i) if cv2.contourArea(i) < 20 or h < 8: continue scaling_factor_in_y = 0.5 scaling_factor_in_x = 0 resized_contour = i.copy() resized_contour = i * [x_scaling, y_scaling] resized_contour = resized_contour.astype(int) final_image__ = np.zeros_like(binary_image1) cv2.drawContours(final_image__, [resized_contour], 0, (255), -1) kernel_dil = np.ones((3,3), np.uint8) final_image__ = cv2.dilate(final_image__, kernel_dil, iterations=3) line_image_final = cv2.bitwise_and(final_image__, binary_image1) line.append(line_image_final) return line except Exception as e: logger.error(f"Error in analise: {str(e)}") return [] def image_resize_and_erosion(image): try: height, width = image.shape[:2] height = height + 1 * height height = int(height) resized_image = cv2.resize(image, (width, height)) kernel = np.ones((13,1), np.uint8) erosion = cv2.erode(resized_image, kernel, iterations=1) return erosion except Exception as e: logger.error(f"Error in image_resize_and_erosion: {str(e)}") return image x_scaling = 0 y_scaling = 0 binary_image1 = 0 line = 0 line_length = 0 count = 0 def convert_image(img): try: # Create temporary directory for processing temp_dir = tempfile.mkdtemp() images_dir = os.path.join(temp_dir, 'images') os.makedirs(images_dir, exist_ok=True) logger.info(f"Created temporary directory at: {images_dir}") # Save the temporary directory path globally for other functions to use global TEMP_IMAGES_DIR TEMP_IMAGES_DIR = images_dir logger.info("Starting image conversion...") logger.info(f"Input image shape: {img.shape}") # Ensure image is in correct format and size if len(img.shape) == 2: # If grayscale img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) elif img.shape[2] == 4: # If RGBA img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) # Resize if image is too small min_size = 800 if img.shape[0] < min_size or img.shape[1] < min_size: scale = min_size / min(img.shape[0], img.shape[1]) img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) img = img.astype(np.uint8) img_copy = np.copy(img) # Enhanced preprocessing # 1. Denoise img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21) # 2. Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 3. Apply adaptive thresholding with different parameters binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 21, 15 ) # 4. Remove noise kernel = np.ones((3,3), np.uint8) binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) # 5. Dilate to connect text components binary = cv2.dilate(binary, kernel, iterations=1) # Find text regions contours, hierarchy = cv2.findContours( binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) # Filter and sort contours by area valid_contours = [] for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) area = cv2.contourArea(cnt) if area > 100 and w > 10 and h > 10: # Minimum size thresholds valid_contours.append(cnt) logger.info(f"Found {len(valid_contours)} valid text regions") # Process each text region word_images = [] for i, cnt in enumerate(valid_contours): try: # Get bounding box x, y, w, h = cv2.boundingRect(cnt) # Add padding padding = 10 x = max(0, x - padding) y = max(0, y - padding) w = min(img.shape[1] - x, w + 2*padding) h = min(img.shape[0] - y, h + 2*padding) # Extract region roi = gray[y:y+h, x:x+w] # Enhance contrast roi = cv2.equalizeHist(roi) # Binarize _, roi_bin = cv2.threshold( roi, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU ) # Save the word image word_path = os.path.join(images_dir, f'word_{i}.png') cv2.imwrite(word_path, roi_bin) word_images.append(word_path) except Exception as e: logger.error(f"Error processing contour {i}: {str(e)}") continue logger.info(f"Successfully extracted {len(word_images)} word images") return word_images except Exception as e: logger.error(f"Error in convert_image: {str(e)}") return [] # img = cv2.imread("ans_image/1.jpg") # convert_image(img)