File size: 3,322 Bytes
a03288d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646700a
 
 
 
 
a03288d
646700a
 
a03288d
 
 
 
646700a
a03288d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
import cv2
import pytesseract
from pytesseract import Output
import numpy as np

def text_detection(img, config="--psm 11 --oem 3"):
    data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT)
    horizontal_text = []
    vertical_text = []

    for i in range(len(data['text'])):
        if int(data['conf'][i]) > 20:
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            text = data['text'][i]
            if w > h:
                horizontal_text.append(text)
            else:
                vertical_text.append(text)
    return horizontal_text, vertical_text, data

def draw_boxes(img, data):
    for i in range(len(data['text'])):
        if int(data['conf'][i]) > 20:
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            text = data['text'][i]
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    return img

def word_level_accuracy(data, ground_truth):
    ocr_text = ' '.join([text for text in data['text'] if text.strip()])
    gt_words = set(ground_truth.split())
    ocr_words = set(ocr_text.split())
    correct = gt_words.intersection(ocr_words)
    return (len(correct) / len(gt_words)) * 100 if gt_words else 0

def character_level_accuracy(data, ground_truth):
    ocr_text = ''.join([text.strip() for text in data['text']])
    gt_chars = set(ground_truth.replace(" ", ""))
    ocr_chars = set(ocr_text.replace(" ", ""))
    correct = gt_chars.intersection(ocr_chars)
    return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0

def process(image, ground_truth):
    if image is None:
        return None, "Please upload an image."

    # Convert to NumPy array if it's a PIL Image
    if not isinstance(image, np.ndarray):
        image = np.array(image)

    # Convert RGB to BGR for OpenCV
    img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Run OCR and accuracy checks
    h_text, v_text, data = text_detection(img_bgr)
    word_acc = word_level_accuracy(data, ground_truth)
    char_acc = character_level_accuracy(data, ground_truth)

    # Draw boxes and convert back to RGB for display
    result_img = draw_boxes(img_bgr.copy(), data)
    result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)

    results = f"**Horizontal Text**: {' '.join(h_text) if h_text else 'None'}\n\n"
    results += f"**Vertical Text**: {' '.join(v_text) if v_text else 'None'}\n\n"
    results += f"**Word-Level Accuracy**: {word_acc:.2f}%\n"
    results += f"**Character-Level Accuracy**: {char_acc:.2f}%"

    return result_img_rgb, results

demo = gr.Interface(
    fn=process,
    inputs=[
        gr.Image(type="numpy", label="Upload Image"),
        gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth")
    ],
    outputs=[
        gr.Image(type="numpy", label="Detected Text with Bounding Boxes"),
        gr.Markdown()
    ],
    title="OCR Accuracy Evaluator with Bounding Boxes",
    description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text."
)

if __name__ == "__main__":
    demo.launch()