Spaces:
Runtime error
Runtime error
import gradio as gr | |
import cv2 | |
import pytesseract | |
from pytesseract import Output | |
import numpy as np | |
def text_detection(img, config="--psm 11 --oem 3"): | |
data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT) | |
horizontal_text = [] | |
vertical_text = [] | |
for i in range(len(data['text'])): | |
if int(data['conf'][i]) > 20: | |
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
text = data['text'][i] | |
if w > h: | |
horizontal_text.append(text) | |
else: | |
vertical_text.append(text) | |
return horizontal_text, vertical_text, data | |
def draw_boxes(img, data): | |
for i in range(len(data['text'])): | |
if int(data['conf'][i]) > 20: | |
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
text = data['text'][i] | |
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) | |
return img | |
def word_level_accuracy(data, ground_truth): | |
ocr_text = ' '.join([text for text in data['text'] if text.strip()]) | |
gt_words = set(ground_truth.split()) | |
ocr_words = set(ocr_text.split()) | |
correct = gt_words.intersection(ocr_words) | |
return (len(correct) / len(gt_words)) * 100 if gt_words else 0 | |
def character_level_accuracy(data, ground_truth): | |
ocr_text = ''.join([text.strip() for text in data['text']]) | |
gt_chars = set(ground_truth.replace(" ", "")) | |
ocr_chars = set(ocr_text.replace(" ", "")) | |
correct = gt_chars.intersection(ocr_chars) | |
return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0 | |
def process(image, ground_truth): | |
if image is None: | |
return None, "Please upload an image." | |
# Convert to NumPy array if it's a PIL Image | |
if not isinstance(image, np.ndarray): | |
image = np.array(image) | |
# Convert RGB to BGR for OpenCV | |
img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
# Run OCR and accuracy checks | |
h_text, v_text, data = text_detection(img_bgr) | |
word_acc = word_level_accuracy(data, ground_truth) | |
char_acc = character_level_accuracy(data, ground_truth) | |
# Draw boxes and convert back to RGB for display | |
result_img = draw_boxes(img_bgr.copy(), data) | |
result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB) | |
results = f"**Horizontal Text**: {' '.join(h_text) if h_text else 'None'}\n\n" | |
results += f"**Vertical Text**: {' '.join(v_text) if v_text else 'None'}\n\n" | |
results += f"**Word-Level Accuracy**: {word_acc:.2f}%\n" | |
results += f"**Character-Level Accuracy**: {char_acc:.2f}%" | |
return result_img_rgb, results | |
demo = gr.Interface( | |
fn=process, | |
inputs=[ | |
gr.Image(type="numpy", label="Upload Image"), | |
gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth") | |
], | |
outputs=[ | |
gr.Image(type="numpy", label="Detected Text with Bounding Boxes"), | |
gr.Markdown() | |
], | |
title="OCR Accuracy Evaluator with Bounding Boxes", | |
description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text." | |
) | |
if __name__ == "__main__": | |
demo.launch() |