|
import cv2 |
|
import os |
|
import tempfile |
|
import logging |
|
import numpy as np |
|
import sys |
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
from utils import notification_queue, log_print |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
from HTR.word import convert_image |
|
from HTR.strike import struck_images |
|
from HTR.hcr import text |
|
from HTR.spell_and_gramer_check import spell_grammer |
|
|
|
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
model_path = os.path.join(project_root, 'models', 'vit-base-beans') |
|
|
|
|
|
logger.info(f"Using model path: {model_path}") |
|
if not os.path.exists(model_path): |
|
logger.error(f"Model directory not found at: {model_path}") |
|
else: |
|
files = os.listdir(model_path) |
|
logger.info(f"Found model files: {files}") |
|
|
|
def preprocess_image(img): |
|
"""Preprocess image to improve text detection""" |
|
try: |
|
|
|
notification_queue.put({ |
|
"type": "info", |
|
"message": "Converting image to grayscale..." |
|
}) |
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
notification_queue.put({ |
|
"type": "info", |
|
"message": "Applying adaptive thresholding..." |
|
}) |
|
binary = cv2.adaptiveThreshold( |
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
|
cv2.THRESH_BINARY_INV, 11, 2 |
|
) |
|
|
|
|
|
notification_queue.put({ |
|
"type": "info", |
|
"message": "Denoising image..." |
|
}) |
|
denoised = cv2.fastNlMeansDenoising(binary) |
|
|
|
|
|
return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR) |
|
except Exception as e: |
|
error_msg = str(e) |
|
notification_queue.put({ |
|
"type": "error", |
|
"message": f"Error in image preprocessing: {error_msg}" |
|
}) |
|
return img |
|
|
|
def extract_text_from_image(img_path): |
|
try: |
|
|
|
log_print(f"Starting text extraction for image: {img_path}") |
|
|
|
|
|
if not os.path.exists(img_path): |
|
error_msg = f"Image file not found: {img_path}" |
|
log_print(error_msg, "ERROR") |
|
notification_queue.put({ |
|
"type": "error", |
|
"message": error_msg |
|
}) |
|
return "" |
|
|
|
|
|
log_print(f"Reading image: {os.path.basename(img_path)}") |
|
img = cv2.imread(img_path) |
|
if img is None: |
|
error_msg = f"Failed to read image: {img_path}" |
|
log_print(error_msg, "ERROR") |
|
notification_queue.put({ |
|
"type": "error", |
|
"message": error_msg |
|
}) |
|
return "" |
|
|
|
|
|
log_print(f"Image properties - Shape: {img.shape}, Type: {img.dtype}") |
|
|
|
|
|
log_print("Converting image to text regions...") |
|
imgs = convert_image(img) |
|
if not imgs: |
|
log_print("No text regions detected, processing whole image...", "WARNING") |
|
imgs = [img] |
|
|
|
log_print(f"Found {len(imgs)} text regions") |
|
|
|
log_print("Processing text regions...") |
|
processed_images = struck_images(imgs) |
|
if not processed_images: |
|
error_msg = "No valid text regions after processing" |
|
log_print(error_msg, "ERROR") |
|
notification_queue.put({ |
|
"type": "error", |
|
"message": error_msg |
|
}) |
|
return "" |
|
|
|
log_print("Extracting text from regions...") |
|
t = text(processed_images) |
|
if not t: |
|
error_msg = "No text could be extracted from image" |
|
log_print(error_msg, "ERROR") |
|
notification_queue.put({ |
|
"type": "error", |
|
"message": error_msg |
|
}) |
|
return "" |
|
|
|
log_print("Performing spell checking...") |
|
t = spell_grammer(t) |
|
|
|
log_print(f"Successfully extracted text: {t}") |
|
notification_queue.put({ |
|
"type": "success", |
|
"message": "Text extraction complete", |
|
"data": { |
|
"extracted_text": t |
|
} |
|
}) |
|
return t |
|
|
|
except Exception as e: |
|
error_msg = f"Error in text extraction: {str(e)}" |
|
log_print(error_msg, "ERROR") |
|
notification_queue.put({ |
|
"type": "error", |
|
"message": error_msg |
|
}) |
|
return "" |
|
|
|
|