from transformers import pipeline from PIL import Image import gradio as gr # Initialize TrOCR pipeline for specialized OCR ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-printed") def extract_text_from_image(image): if image is None: return "No image provided. Please upload an image file." try: # Convert Gradio image to PIL Image pil_image = Image.fromarray(image) # Extract text from image using TrOCR result = ocr_pipeline(pil_image) # Return the extracted text return result[0]['generated_text'] except Exception as e: return f"Error during text extraction: {str(e)}" # Gradio interface with gr.Blocks(title="Image Text Extractor") as demo: gr.Markdown("# 📷 Image Text Extractor") gr.Markdown("Extract text from images using Microsoft's TrOCR model") with gr.Row(): with gr.Column(): image_input = gr.Image( type="numpy", label="Upload Image" ) extract_btn = gr.Button("Extract Text", variant="primary") with gr.Column(): text_output = gr.Textbox( lines=10, label="Extracted Text", interactive=False ) extract_btn.click( extract_text_from_image, inputs=image_input, outputs=text_output ) gr.Examples( examples=[ ["example1.jpg"], ["example2.png"] ], inputs=[image_input], ) gr.Markdown("### About This Model") gr.Markdown("- **Model**: [microsoft/trocr-base-printed](https://huggingface.co/microsoft/trocr-base-printed)") gr.Markdown("- **Task**: Optical Character Recognition (OCR)") gr.Markdown("- **Architecture**: Transformer-based OCR (TrOCR)") gr.Markdown("- **Capabilities**: Specialized for printed text extraction") gr.Markdown("- **Note**: First processing may take 15-25 seconds (model loading)") gr.Markdown("- **Supported Formats**: JPG, PNG, JPEG") if __name__ == "__main__": demo.launch()