|
import cv2 |
|
import os |
|
import tempfile |
|
import logging |
|
import numpy as np |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
from HTR.word import convert_image |
|
from HTR.strike import struck_images |
|
from HTR.hcr import text |
|
from HTR.spell_and_gramer_check import spell_grammer |
|
|
|
def preprocess_image(img): |
|
"""Preprocess image to improve text detection""" |
|
try: |
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
binary = cv2.adaptiveThreshold( |
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
|
cv2.THRESH_BINARY_INV, 11, 2 |
|
) |
|
|
|
|
|
denoised = cv2.fastNlMeansDenoising(binary) |
|
|
|
|
|
return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR) |
|
except Exception as e: |
|
logger.error(f"Error in preprocess_image: {str(e)}") |
|
return img |
|
|
|
def extract_text_from_image(img_path): |
|
try: |
|
|
|
if not os.path.exists(img_path): |
|
logger.error(f"Image file not found: {img_path}") |
|
return "" |
|
|
|
|
|
img = cv2.imread(img_path) |
|
if img is None: |
|
logger.error(f"Failed to read image: {img_path}") |
|
return "" |
|
|
|
|
|
logger.info(f"Processing image: {img_path}") |
|
logger.info(f"Image shape: {img.shape}") |
|
logger.info(f"Image dtype: {img.dtype}") |
|
|
|
|
|
if len(img.shape) == 2: |
|
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) |
|
elif img.shape[2] == 4: |
|
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) |
|
|
|
|
|
img = img.astype(np.uint8) |
|
|
|
|
|
logger.info("Preprocessing image...") |
|
img = preprocess_image(img) |
|
|
|
|
|
logger.info("Converting image...") |
|
imgs = convert_image(img) |
|
if not imgs: |
|
logger.error("No text regions detected in image") |
|
return "" |
|
|
|
logger.info(f"Found {len(imgs)} text regions") |
|
|
|
logger.info("Processing struck images...") |
|
images_path = struck_images(imgs) |
|
if not images_path: |
|
logger.error("No valid text regions after strike processing") |
|
return "" |
|
|
|
logger.info("Extracting text...") |
|
t = text(images_path) |
|
if not t: |
|
logger.error("No text extracted from image") |
|
return "" |
|
|
|
logger.info("Spell checking...") |
|
t = spell_grammer(t) |
|
|
|
logger.info(f"Extracted text: {t}") |
|
return t |
|
|
|
except Exception as e: |
|
logger.error(f"Error in extract_text_from_image: {str(e)}") |
|
return "" |
|
|
|
|