ocrvalidator / app.py
petchutney's picture
Update app.py
5121916 verified
import gradio as gr
import numpy as np
from PIL import Image
from transformers import pipeline
ocr_pipe = pipeline("image-to-text", model="facebook/nougat-base") # Best for academic handwriting
# Alternative OCR models: "mfrashad/arabic-handwriting-ocr", "TesseractOCR"
similarity_pipe = pipeline("feature-extraction", model="sentence-transformers/paraphrase-albert-small-v2") # Lightweight
def validate_answer(image, user_text, correct_answer):
outputs = ["", "", ""] # Initialize outputs
try:
# OCR Processing
if image:
img = Image.fromarray(image.astype('uint8'))
ocr_result = ocr_pipe(img)
user_text = ocr_result[0]['generated_text']
outputs[1] = f"πŸ“ Extracted: {user_text}"
# Clarity Check (simple rule-based)
clarity = sum(c.isalnum() for c in user_text) / max(1, len(user_text))
outputs[0] = f"βœ… Clarity: {clarity:.0%}" if clarity > 0.5 else "⚠️ Unclear handwriting"
# Only compare if text is clear
if clarity > 0.5:
embeds = np.array(similarity_pipe([correct_answer, user_text]))
similarity = np.dot(embeds[0].mean(axis=0), embeds[1].mean(axis=0))
outputs[2] = f"πŸ” Similarity: {similarity:.1%}"
except Exception as e:
outputs = [f"❌ Error: {str(e)}"] * 3
return outputs
# Simple Interface
with gr.Blocks() as demo:
gr.Markdown("## Free Handwriting Validator")
with gr.Row():
gr.Image(label="Upload Answer", sources=["upload"], type="numpy")
gr.Textbox(label="Or Type Answer")
gr.Textbox(label="Correct Answer", value="Photosynthesis occurs in chloroplasts.")
gr.Button("Validate").click(
validate_answer,
inputs=[gr.Image(), gr.Textbox(), gr.Textbox()],
outputs=[gr.Textbox(label="Status"),
gr.Textbox(label="OCR Result"),
gr.Textbox(label="Comparison")]
)
demo.launch(debug=True)