import cv2 import os import tempfile import logging import numpy as np import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils import notification_queue, log_print # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Import HTR modules from HTR.word import convert_image from HTR.strike import struck_images from HTR.hcr import text from HTR.spell_and_gramer_check import spell_grammer # Get absolute path to project root project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) model_path = os.path.join(project_root, 'models', 'vit-base-beans') # Log model path for debugging logger.info(f"Using model path: {model_path}") if not os.path.exists(model_path): logger.error(f"Model directory not found at: {model_path}") else: files = os.listdir(model_path) logger.info(f"Found model files: {files}") def preprocess_image(img): """Preprocess image to improve text detection""" try: # Convert to grayscale notification_queue.put({ "type": "info", "message": "Converting image to grayscale..." }) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply adaptive thresholding notification_queue.put({ "type": "info", "message": "Applying adaptive thresholding..." }) binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Denoise notification_queue.put({ "type": "info", "message": "Denoising image..." }) denoised = cv2.fastNlMeansDenoising(binary) # Convert back to BGR return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR) except Exception as e: error_msg = str(e) notification_queue.put({ "type": "error", "message": f"Error in image preprocessing: {error_msg}" }) return img def extract_text_from_image(img_path): try: # Log start of text extraction log_print(f"Starting text extraction for image: {img_path}") # Ensure the image exists if not os.path.exists(img_path): error_msg = f"Image file not found: {img_path}" log_print(error_msg, "ERROR") notification_queue.put({ "type": "error", "message": error_msg }) return "" # Read the image log_print(f"Reading image: {os.path.basename(img_path)}") img = cv2.imread(img_path) if img is None: error_msg = f"Failed to read image: {img_path}" log_print(error_msg, "ERROR") notification_queue.put({ "type": "error", "message": error_msg }) return "" # Log image properties log_print(f"Image properties - Shape: {img.shape}, Type: {img.dtype}") # Process the image log_print("Converting image to text regions...") imgs = convert_image(img) if not imgs: log_print("No text regions detected, processing whole image...", "WARNING") imgs = [img] log_print(f"Found {len(imgs)} text regions") log_print("Processing text regions...") processed_images = struck_images(imgs) if not processed_images: error_msg = "No valid text regions after processing" log_print(error_msg, "ERROR") notification_queue.put({ "type": "error", "message": error_msg }) return "" log_print("Extracting text from regions...") t = text(processed_images) if not t: error_msg = "No text could be extracted from image" log_print(error_msg, "ERROR") notification_queue.put({ "type": "error", "message": error_msg }) return "" log_print("Performing spell checking...") t = spell_grammer(t) log_print(f"Successfully extracted text: {t}") notification_queue.put({ "type": "success", "message": "Text extraction complete", "data": { "extracted_text": t } }) return t except Exception as e: error_msg = f"Error in text extraction: {str(e)}" log_print(error_msg, "ERROR") notification_queue.put({ "type": "error", "message": error_msg }) return "" # extract_text_from_image("ans_image/1.jpg")