import cv2 import os import tempfile import logging import numpy as np # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) from HTR.word import convert_image from HTR.strike import struck_images from HTR.hcr import text from HTR.spell_and_gramer_check import spell_grammer def preprocess_image(img): """Preprocess image to improve text detection""" try: # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply adaptive thresholding binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Denoise denoised = cv2.fastNlMeansDenoising(binary) # Convert back to BGR return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR) except Exception as e: logger.error(f"Error in preprocess_image: {str(e)}") return img def extract_text_from_image(img_path): try: # Ensure the image exists if not os.path.exists(img_path): logger.error(f"Image file not found: {img_path}") return "" # Read the image img = cv2.imread(img_path) if img is None: logger.error(f"Failed to read image: {img_path}") return "" # Log image properties logger.info(f"Processing image: {img_path}") logger.info(f"Image shape: {img.shape}") logger.info(f"Image dtype: {img.dtype}") # Convert image to RGB if needed if len(img.shape) == 2: # If grayscale img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) elif img.shape[2] == 4: # If RGBA img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) # Ensure image is in correct format img = img.astype(np.uint8) # Preprocess image logger.info("Preprocessing image...") img = preprocess_image(img) # Process the image logger.info("Converting image...") imgs = convert_image(img) if not imgs: logger.error("No text regions detected in image") # Try processing the whole image as one region temp_path = os.path.join(tempfile.gettempdir(), 'whole_image.png') cv2.imwrite(temp_path, img) imgs = [temp_path] logger.info(f"Found {len(imgs)} text regions") logger.info("Processing struck images...") images_path = struck_images(imgs) if not images_path: logger.error("No valid text regions after strike processing") return "" logger.info("Extracting text...") t = text(images_path) if not t: logger.error("No text extracted from image") return "" logger.info("Spell checking...") t = spell_grammer(t) logger.info(f"Extracted text: {t}") return t except Exception as e: logger.error(f"Error in extract_text_from_image: {str(e)}") return "" # extract_text_from_image("ans_image/1.jpg")