File size: 4,831 Bytes
51c49bc
26f855a
 
2ad0600
 
2795ce6
 
54e2fee
2ad0600
 
 
 
51c49bc
237de6f
51c49bc
 
 
 
 
ef2032a
 
 
 
 
 
 
 
 
 
 
84b78a0
2ad0600
 
 
 
2795ce6
 
 
 
2ad0600
 
 
2795ce6
 
 
 
2ad0600
 
 
 
 
 
2795ce6
 
 
 
2ad0600
 
 
 
 
2795ce6
 
 
 
 
2ad0600
 
51c49bc
26f855a
c083a98
 
 
26f855a
 
c083a98
 
2795ce6
 
c083a98
2795ce6
2ad0600
26f855a
 
c083a98
26f855a
 
c083a98
 
2795ce6
 
c083a98
2795ce6
2ad0600
 
 
c083a98
2ad0600
26f855a
c083a98
26f855a
2ad0600
c083a98
3885e21
2ad0600
c083a98
2ad0600
c083a98
3885e21
 
c083a98
 
2795ce6
 
c083a98
2795ce6
2ad0600
 
c083a98
3885e21
2ad0600
c083a98
 
2795ce6
 
c083a98
2795ce6
2ad0600
 
c083a98
26f855a
 
c083a98
2795ce6
 
 
 
 
 
 
26f855a
2ad0600
26f855a
c083a98
 
2795ce6
 
c083a98
2795ce6
26f855a
51c49bc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import cv2
import os
import tempfile
import logging
import numpy as np
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils import notification_queue, log_print

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Import HTR modules
from HTR.word import convert_image
from HTR.strike import struck_images
from HTR.hcr import text
from HTR.spell_and_gramer_check import spell_grammer

# Get absolute path to project root
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
model_path = os.path.join(project_root, 'models', 'vit-base-beans')

# Log model path for debugging
logger.info(f"Using model path: {model_path}")
if not os.path.exists(model_path):
    logger.error(f"Model directory not found at: {model_path}")
else:
    files = os.listdir(model_path)
    logger.info(f"Found model files: {files}")

def preprocess_image(img):
    """Preprocess image to improve text detection"""
    try:
        # Convert to grayscale
        notification_queue.put({
            "type": "info",
            "message": "Converting image to grayscale..."
        })
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Apply adaptive thresholding
        notification_queue.put({
            "type": "info",
            "message": "Applying adaptive thresholding..."
        })
        binary = cv2.adaptiveThreshold(
            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
            cv2.THRESH_BINARY_INV, 11, 2
        )
        
        # Denoise
        notification_queue.put({
            "type": "info",
            "message": "Denoising image..."
        })
        denoised = cv2.fastNlMeansDenoising(binary)
        
        # Convert back to BGR
        return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR)
    except Exception as e:
        error_msg = str(e)
        notification_queue.put({
            "type": "error",
            "message": f"Error in image preprocessing: {error_msg}"
        })
        return img

def extract_text_from_image(img_path):
    try:
        # Log start of text extraction
        log_print(f"Starting text extraction for image: {img_path}")
        
        # Ensure the image exists
        if not os.path.exists(img_path):
            error_msg = f"Image file not found: {img_path}"
            log_print(error_msg, "ERROR")
            notification_queue.put({
                "type": "error",
                "message": error_msg
            })
            return ""
            
        # Read the image
        log_print(f"Reading image: {os.path.basename(img_path)}")
        img = cv2.imread(img_path)
        if img is None:
            error_msg = f"Failed to read image: {img_path}"
            log_print(error_msg, "ERROR")
            notification_queue.put({
                "type": "error",
                "message": error_msg
            })
            return ""
            
        # Log image properties
        log_print(f"Image properties - Shape: {img.shape}, Type: {img.dtype}")
        
        # Process the image
        log_print("Converting image to text regions...")
        imgs = convert_image(img)
        if not imgs:
            log_print("No text regions detected, processing whole image...", "WARNING")
            imgs = [img]
            
        log_print(f"Found {len(imgs)} text regions")
        
        log_print("Processing text regions...")
        processed_images = struck_images(imgs)
        if not processed_images:
            error_msg = "No valid text regions after processing"
            log_print(error_msg, "ERROR")
            notification_queue.put({
                "type": "error",
                "message": error_msg
            })
            return ""
            
        log_print("Extracting text from regions...")
        t = text(processed_images)
        if not t:
            error_msg = "No text could be extracted from image"
            log_print(error_msg, "ERROR")
            notification_queue.put({
                "type": "error",
                "message": error_msg
            })
            return ""
            
        log_print("Performing spell checking...")
        t = spell_grammer(t)
        
        log_print(f"Successfully extracted text: {t}")
        notification_queue.put({
            "type": "success",
            "message": "Text extraction complete",
            "data": {
                "extracted_text": t
            }
        })
        return t
        
    except Exception as e:
        error_msg = f"Error in text extraction: {str(e)}"
        log_print(error_msg, "ERROR")
        notification_queue.put({
            "type": "error",
            "message": error_msg
        })
        return ""

# extract_text_from_image("ans_image/1.jpg")