|
import gradio as gr |
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
import pickle |
|
from tensorflow.keras.models import load_model |
|
from tensorflow.keras.preprocessing.image import img_to_array |
|
import easyocr |
|
|
|
|
|
model_path = "MobileNetBest_Model.h5" |
|
label_path = "MobileNet_Label_Encoder.pkl" |
|
|
|
model = load_model(model_path) |
|
print("Model loaded.") |
|
|
|
|
|
try: |
|
with open(label_path, 'rb') as f: |
|
label_map = pickle.load(f) |
|
index_to_label = {v: k for k, v in label_map.items()} |
|
print("Label encoder loaded:", index_to_label) |
|
except: |
|
index_to_label = {0: "Handwritten", 1: "Computerized"} |
|
print("Label encoder not found. Using default:", index_to_label) |
|
|
|
|
|
reader = easyocr.Reader(['en'], gpu=True) |
|
print("EasyOCR Reader initialized with GPU.") |
|
|
|
|
|
def classify_text_region(region_img): |
|
try: |
|
region_img = cv2.resize(region_img, (224, 224)) |
|
region_img = region_img.astype("float32") / 255.0 |
|
region_img = img_to_array(region_img) |
|
region_img = np.expand_dims(region_img, axis=0) |
|
|
|
preds = model.predict(region_img) |
|
|
|
if preds.shape[-1] == 1: |
|
return "Computerized" if preds[0][0] > 0.5 else "Handwritten" |
|
else: |
|
class_idx = np.argmax(preds[0]) |
|
return index_to_label.get(class_idx, "Unknown") |
|
except Exception as e: |
|
print("Classification error:", e) |
|
return "Unknown" |
|
|
|
|
|
def AnnotatedTextDetection_EasyOCR_from_array(img): |
|
results = reader.readtext(img) |
|
annotated_results = [] |
|
|
|
for (bbox, text, conf) in results[:50]: |
|
if conf < 0.3 or text.strip() == "": |
|
continue |
|
|
|
x1, y1 = map(int, bbox[0]) |
|
x2, y2 = map(int, bbox[2]) |
|
crop = img[y1:y2, x1:x2] |
|
if crop.size == 0: |
|
continue |
|
|
|
label = classify_text_region(crop) |
|
annotated_results.append(f"{text.strip()} → {label}") |
|
|
|
color = (0, 255, 0) if label == "Computerized" else (255, 0, 0) |
|
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) |
|
cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1) |
|
|
|
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB), "\n".join(annotated_results) |
|
|
|
|
|
def infer(image): |
|
img = np.array(image) |
|
|
|
|
|
max_dim = 1000 |
|
if img.shape[0] > max_dim or img.shape[1] > max_dim: |
|
scale = max_dim / max(img.shape[0], img.shape[1]) |
|
img = cv2.resize(img, (int(img.shape[1]*scale), int(img.shape[0]*scale))) |
|
|
|
annotated_img, result_text = AnnotatedTextDetection_EasyOCR_from_array(img) |
|
return Image.fromarray(annotated_img), result_text |
|
|
|
|
|
custom_css = """ |
|
body { |
|
background-color: #e6f2ff; |
|
} |
|
.gradio-container { |
|
border-radius: 12px; |
|
padding: 20px; |
|
border: 2px solid #007acc; |
|
} |
|
.gr-input, .gr-output { |
|
border: 1px solid #007acc; |
|
border-radius: 10px; |
|
} |
|
""" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=infer, |
|
inputs=gr.Image(type="pil", label="Upload Image"), |
|
outputs=[ |
|
gr.Image(type="pil", label="Annotated Image"), |
|
gr.Textbox(label="Detected Text and Classification") |
|
], |
|
title="Text Detection and Classification", |
|
description="This application detects text using EasyOCR and classifies each text region as Handwritten or Computerized using a MobileNet model.", |
|
theme="soft", |
|
css=custom_css |
|
) |
|
demo.launch() |