Ramzan0553's picture
Update app.py
646700a verified
import gradio as gr
import cv2
import pytesseract
from pytesseract import Output
import numpy as np
def text_detection(img, config="--psm 11 --oem 3"):
data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT)
horizontal_text = []
vertical_text = []
for i in range(len(data['text'])):
if int(data['conf'][i]) > 20:
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
text = data['text'][i]
if w > h:
horizontal_text.append(text)
else:
vertical_text.append(text)
return horizontal_text, vertical_text, data
def draw_boxes(img, data):
for i in range(len(data['text'])):
if int(data['conf'][i]) > 20:
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
text = data['text'][i]
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
return img
def word_level_accuracy(data, ground_truth):
ocr_text = ' '.join([text for text in data['text'] if text.strip()])
gt_words = set(ground_truth.split())
ocr_words = set(ocr_text.split())
correct = gt_words.intersection(ocr_words)
return (len(correct) / len(gt_words)) * 100 if gt_words else 0
def character_level_accuracy(data, ground_truth):
ocr_text = ''.join([text.strip() for text in data['text']])
gt_chars = set(ground_truth.replace(" ", ""))
ocr_chars = set(ocr_text.replace(" ", ""))
correct = gt_chars.intersection(ocr_chars)
return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0
def process(image, ground_truth):
if image is None:
return None, "Please upload an image."
# Convert to NumPy array if it's a PIL Image
if not isinstance(image, np.ndarray):
image = np.array(image)
# Convert RGB to BGR for OpenCV
img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Run OCR and accuracy checks
h_text, v_text, data = text_detection(img_bgr)
word_acc = word_level_accuracy(data, ground_truth)
char_acc = character_level_accuracy(data, ground_truth)
# Draw boxes and convert back to RGB for display
result_img = draw_boxes(img_bgr.copy(), data)
result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)
results = f"**Horizontal Text**: {' '.join(h_text) if h_text else 'None'}\n\n"
results += f"**Vertical Text**: {' '.join(v_text) if v_text else 'None'}\n\n"
results += f"**Word-Level Accuracy**: {word_acc:.2f}%\n"
results += f"**Character-Level Accuracy**: {char_acc:.2f}%"
return result_img_rgb, results
demo = gr.Interface(
fn=process,
inputs=[
gr.Image(type="numpy", label="Upload Image"),
gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth")
],
outputs=[
gr.Image(type="numpy", label="Detected Text with Bounding Boxes"),
gr.Markdown()
],
title="OCR Accuracy Evaluator with Bounding Boxes",
description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text."
)
if __name__ == "__main__":
demo.launch()