Spaces:
Paused
Paused
import torch | |
from transformers import VisionEncoderDecoderModel, DonutProcessor | |
from PIL import Image | |
from pdf2image import convert_from_bytes | |
import gradio as gr | |
# Configuraci贸n del modelo Donut | |
MODEL_ID = "mychen76/invoice-and-receipts_donut_v1" | |
print("Cargando modelo Donut...") | |
model = VisionEncoderDecoderModel.from_pretrained(MODEL_ID) | |
processor = DonutProcessor.from_pretrained(MODEL_ID) | |
model.eval() | |
# Funci贸n para procesar documentos | |
def process_document(file_path): | |
# Leer y procesar el archivo | |
if file_path.endswith(".pdf"): | |
with open(file_path, "rb") as pdf_file: | |
images = convert_from_bytes(pdf_file.read(), dpi=300) | |
else: | |
images = [Image.open(file_path).convert("RGB")] | |
results = [] | |
for img in images: | |
# Preprocesar la imagen | |
inputs = processor(img, return_tensors="pt", max_patches=1024) | |
# Generar predicci贸n | |
with torch.no_grad(): | |
outputs = model.generate(**inputs) | |
# Decodificar resultado | |
result = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
results.append(result) | |
return results | |
# Interfaz Gradio | |
iface = gr.Interface( | |
fn=process_document, | |
inputs=gr.File(label="Sube tu factura o recibo (PDF o imagen)", type="filepath"), | |
outputs="json", | |
title="Donut OCR - Extracci贸n de datos de facturas", | |
description="Sube un PDF o imagen y extrae informaci贸n estructurada (n煤mero de factura, fecha, monto, etc.) utilizando Donut OCR." | |
) | |
# Iniciar la aplicaci贸n | |
if __name__ == "__main__": | |
iface.launch() | |