File size: 4,831 Bytes
51c49bc 26f855a 2ad0600 2795ce6 54e2fee 2ad0600 51c49bc 237de6f 51c49bc ef2032a 84b78a0 2ad0600 2795ce6 2ad0600 2795ce6 2ad0600 2795ce6 2ad0600 2795ce6 2ad0600 51c49bc 26f855a c083a98 26f855a c083a98 2795ce6 c083a98 2795ce6 2ad0600 26f855a c083a98 26f855a c083a98 2795ce6 c083a98 2795ce6 2ad0600 c083a98 2ad0600 26f855a c083a98 26f855a 2ad0600 c083a98 3885e21 2ad0600 c083a98 2ad0600 c083a98 3885e21 c083a98 2795ce6 c083a98 2795ce6 2ad0600 c083a98 3885e21 2ad0600 c083a98 2795ce6 c083a98 2795ce6 2ad0600 c083a98 26f855a c083a98 2795ce6 26f855a 2ad0600 26f855a c083a98 2795ce6 c083a98 2795ce6 26f855a 51c49bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import cv2
import os
import tempfile
import logging
import numpy as np
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils import notification_queue, log_print
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Import HTR modules
from HTR.word import convert_image
from HTR.strike import struck_images
from HTR.hcr import text
from HTR.spell_and_gramer_check import spell_grammer
# Get absolute path to project root
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
model_path = os.path.join(project_root, 'models', 'vit-base-beans')
# Log model path for debugging
logger.info(f"Using model path: {model_path}")
if not os.path.exists(model_path):
logger.error(f"Model directory not found at: {model_path}")
else:
files = os.listdir(model_path)
logger.info(f"Found model files: {files}")
def preprocess_image(img):
"""Preprocess image to improve text detection"""
try:
# Convert to grayscale
notification_queue.put({
"type": "info",
"message": "Converting image to grayscale..."
})
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
notification_queue.put({
"type": "info",
"message": "Applying adaptive thresholding..."
})
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
# Denoise
notification_queue.put({
"type": "info",
"message": "Denoising image..."
})
denoised = cv2.fastNlMeansDenoising(binary)
# Convert back to BGR
return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR)
except Exception as e:
error_msg = str(e)
notification_queue.put({
"type": "error",
"message": f"Error in image preprocessing: {error_msg}"
})
return img
def extract_text_from_image(img_path):
try:
# Log start of text extraction
log_print(f"Starting text extraction for image: {img_path}")
# Ensure the image exists
if not os.path.exists(img_path):
error_msg = f"Image file not found: {img_path}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
# Read the image
log_print(f"Reading image: {os.path.basename(img_path)}")
img = cv2.imread(img_path)
if img is None:
error_msg = f"Failed to read image: {img_path}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
# Log image properties
log_print(f"Image properties - Shape: {img.shape}, Type: {img.dtype}")
# Process the image
log_print("Converting image to text regions...")
imgs = convert_image(img)
if not imgs:
log_print("No text regions detected, processing whole image...", "WARNING")
imgs = [img]
log_print(f"Found {len(imgs)} text regions")
log_print("Processing text regions...")
processed_images = struck_images(imgs)
if not processed_images:
error_msg = "No valid text regions after processing"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
log_print("Extracting text from regions...")
t = text(processed_images)
if not t:
error_msg = "No text could be extracted from image"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
log_print("Performing spell checking...")
t = spell_grammer(t)
log_print(f"Successfully extracted text: {t}")
notification_queue.put({
"type": "success",
"message": "Text extraction complete",
"data": {
"extracted_text": t
}
})
return t
except Exception as e:
error_msg = f"Error in text extraction: {str(e)}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
# extract_text_from_image("ans_image/1.jpg") |