import gradio as gr import cv2 import pytesseract from pytesseract import Output import numpy as np def text_detection(img, config="--psm 11 --oem 3"): data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT) horizontal_text = [] vertical_text = [] for i in range(len(data['text'])): if int(data['conf'][i]) > 20: x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] text = data['text'][i] if w > h: horizontal_text.append(text) else: vertical_text.append(text) return horizontal_text, vertical_text, data def draw_boxes(img, data): for i in range(len(data['text'])): if int(data['conf'][i]) > 20: x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] text = data['text'][i] cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) return img def word_level_accuracy(data, ground_truth): ocr_text = ' '.join([text for text in data['text'] if text.strip()]) gt_words = set(ground_truth.split()) ocr_words = set(ocr_text.split()) correct = gt_words.intersection(ocr_words) return (len(correct) / len(gt_words)) * 100 if gt_words else 0 def character_level_accuracy(data, ground_truth): ocr_text = ''.join([text.strip() for text in data['text']]) gt_chars = set(ground_truth.replace(" ", "")) ocr_chars = set(ocr_text.replace(" ", "")) correct = gt_chars.intersection(ocr_chars) return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0 def process(image, ground_truth): if image is None: return None, "Please upload an image." # Convert to NumPy array if it's a PIL Image if not isinstance(image, np.ndarray): image = np.array(image) # Convert RGB to BGR for OpenCV img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run OCR and accuracy checks h_text, v_text, data = text_detection(img_bgr) word_acc = word_level_accuracy(data, ground_truth) char_acc = character_level_accuracy(data, ground_truth) # Draw boxes and convert back to RGB for display result_img = draw_boxes(img_bgr.copy(), data) result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB) results = f"**Horizontal Text**: {' '.join(h_text) if h_text else 'None'}\n\n" results += f"**Vertical Text**: {' '.join(v_text) if v_text else 'None'}\n\n" results += f"**Word-Level Accuracy**: {word_acc:.2f}%\n" results += f"**Character-Level Accuracy**: {char_acc:.2f}%" return result_img_rgb, results demo = gr.Interface( fn=process, inputs=[ gr.Image(type="numpy", label="Upload Image"), gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth") ], outputs=[ gr.Image(type="numpy", label="Detected Text with Bounding Boxes"), gr.Markdown() ], title="OCR Accuracy Evaluator with Bounding Boxes", description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text." ) if __name__ == "__main__": demo.launch()