yamanavijayavardhan's picture
printing extracted text22
ef2032a
import cv2
import os
import tempfile
import logging
import numpy as np
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils import notification_queue, log_print
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Import HTR modules
from HTR.word import convert_image
from HTR.strike import struck_images
from HTR.hcr import text
from HTR.spell_and_gramer_check import spell_grammer
# Get absolute path to project root
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
model_path = os.path.join(project_root, 'models', 'vit-base-beans')
# Log model path for debugging
logger.info(f"Using model path: {model_path}")
if not os.path.exists(model_path):
logger.error(f"Model directory not found at: {model_path}")
else:
files = os.listdir(model_path)
logger.info(f"Found model files: {files}")
def preprocess_image(img):
"""Preprocess image to improve text detection"""
try:
# Convert to grayscale
notification_queue.put({
"type": "info",
"message": "Converting image to grayscale..."
})
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
notification_queue.put({
"type": "info",
"message": "Applying adaptive thresholding..."
})
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
# Denoise
notification_queue.put({
"type": "info",
"message": "Denoising image..."
})
denoised = cv2.fastNlMeansDenoising(binary)
# Convert back to BGR
return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR)
except Exception as e:
error_msg = str(e)
notification_queue.put({
"type": "error",
"message": f"Error in image preprocessing: {error_msg}"
})
return img
def extract_text_from_image(img_path):
try:
# Log start of text extraction
log_print(f"Starting text extraction for image: {img_path}")
# Ensure the image exists
if not os.path.exists(img_path):
error_msg = f"Image file not found: {img_path}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
# Read the image
log_print(f"Reading image: {os.path.basename(img_path)}")
img = cv2.imread(img_path)
if img is None:
error_msg = f"Failed to read image: {img_path}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
# Log image properties
log_print(f"Image properties - Shape: {img.shape}, Type: {img.dtype}")
# Process the image
log_print("Converting image to text regions...")
imgs = convert_image(img)
if not imgs:
log_print("No text regions detected, processing whole image...", "WARNING")
imgs = [img]
log_print(f"Found {len(imgs)} text regions")
log_print("Processing text regions...")
processed_images = struck_images(imgs)
if not processed_images:
error_msg = "No valid text regions after processing"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
log_print("Extracting text from regions...")
t = text(processed_images)
if not t:
error_msg = "No text could be extracted from image"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
log_print("Performing spell checking...")
t = spell_grammer(t)
log_print(f"Successfully extracted text: {t}")
notification_queue.put({
"type": "success",
"message": "Text extraction complete",
"data": {
"extracted_text": t
}
})
return t
except Exception as e:
error_msg = f"Error in text extraction: {str(e)}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
# extract_text_from_image("ans_image/1.jpg")