yamanavijayavardhan's picture
update_new_new_new
2ad0600
raw
history blame
3.03 kB
import cv2
import os
import tempfile
import logging
import numpy as np
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from HTR.word import convert_image
from HTR.strike import struck_images
from HTR.hcr import text
from HTR.spell_and_gramer_check import spell_grammer
def preprocess_image(img):
"""Preprocess image to improve text detection"""
try:
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
# Denoise
denoised = cv2.fastNlMeansDenoising(binary)
# Convert back to BGR
return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR)
except Exception as e:
logger.error(f"Error in preprocess_image: {str(e)}")
return img
def extract_text_from_image(img_path):
try:
# Ensure the image exists
if not os.path.exists(img_path):
logger.error(f"Image file not found: {img_path}")
return ""
# Read the image
img = cv2.imread(img_path)
if img is None:
logger.error(f"Failed to read image: {img_path}")
return ""
# Log image properties
logger.info(f"Processing image: {img_path}")
logger.info(f"Image shape: {img.shape}")
logger.info(f"Image dtype: {img.dtype}")
# Convert image to RGB if needed
if len(img.shape) == 2: # If grayscale
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
elif img.shape[2] == 4: # If RGBA
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
# Ensure image is in correct format
img = img.astype(np.uint8)
# Preprocess image
logger.info("Preprocessing image...")
img = preprocess_image(img)
# Process the image
logger.info("Converting image...")
imgs = convert_image(img)
if not imgs:
logger.error("No text regions detected in image")
return ""
logger.info(f"Found {len(imgs)} text regions")
logger.info("Processing struck images...")
images_path = struck_images(imgs)
if not images_path:
logger.error("No valid text regions after strike processing")
return ""
logger.info("Extracting text...")
t = text(images_path)
if not t:
logger.error("No text extracted from image")
return ""
logger.info("Spell checking...")
t = spell_grammer(t)
logger.info(f"Extracted text: {t}")
return t
except Exception as e:
logger.error(f"Error in extract_text_from_image: {str(e)}")
return ""
# extract_text_from_image("ans_image/1.jpg")