Spaces:

Ramzan0553
/

Diagrams_Text_Detection

Runtime error

App Files Files Community

Diagrams_Text_Detection / app.py

Ramzan0553

Update app.py

646700a verified 17 days ago

raw

history blame contribute delete

3.32 kB

	import gradio as gr
	import cv2
	import pytesseract
	from pytesseract import Output
	import numpy as np

	def text_detection(img, config="--psm 11 --oem 3"):
	data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT)
	horizontal_text = []
	vertical_text = []

	for i in range(len(data['text'])):
	if int(data['conf'][i]) > 20:
	x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
	text = data['text'][i]
	if w > h:
	horizontal_text.append(text)
	else:
	vertical_text.append(text)
	return horizontal_text, vertical_text, data

	def draw_boxes(img, data):
	for i in range(len(data['text'])):
	if int(data['conf'][i]) > 20:
	x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
	text = data['text'][i]
	cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
	cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
	return img

	def word_level_accuracy(data, ground_truth):
	ocr_text = ' '.join([text for text in data['text'] if text.strip()])
	gt_words = set(ground_truth.split())
	ocr_words = set(ocr_text.split())
	correct = gt_words.intersection(ocr_words)
	return (len(correct) / len(gt_words)) * 100 if gt_words else 0

	def character_level_accuracy(data, ground_truth):
	ocr_text = ''.join([text.strip() for text in data['text']])
	gt_chars = set(ground_truth.replace(" ", ""))
	ocr_chars = set(ocr_text.replace(" ", ""))
	correct = gt_chars.intersection(ocr_chars)
	return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0

	def process(image, ground_truth):
	if image is None:
	return None, "Please upload an image."

	# Convert to NumPy array if it's a PIL Image
	if not isinstance(image, np.ndarray):
	image = np.array(image)

	# Convert RGB to BGR for OpenCV
	img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

	# Run OCR and accuracy checks
	h_text, v_text, data = text_detection(img_bgr)
	word_acc = word_level_accuracy(data, ground_truth)
	char_acc = character_level_accuracy(data, ground_truth)

	# Draw boxes and convert back to RGB for display
	result_img = draw_boxes(img_bgr.copy(), data)
	result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)

	results = f"Horizontal Text: {' '.join(h_text) if h_text else 'None'}\n\n"
	results += f"Vertical Text: {' '.join(v_text) if v_text else 'None'}\n\n"
	results += f"Word-Level Accuracy: {word_acc:.2f}%\n"
	results += f"Character-Level Accuracy: {char_acc:.2f}%"

	return result_img_rgb, results

	demo = gr.Interface(
	fn=process,
	inputs=[
	gr.Image(type="numpy", label="Upload Image"),
	gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth")
	],
	outputs=[
	gr.Image(type="numpy", label="Detected Text with Bounding Boxes"),
	gr.Markdown()
	],
	title="OCR Accuracy Evaluator with Bounding Boxes",
	description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text."
	)

	if __name__ == "__main__":
	demo.launch()