Spaces:
Paused
Paused
File size: 1,586 Bytes
22b92ea 98b1c09 b991439 98b1c09 b991439 98b1c09 b991439 98b1c09 b991439 98b1c09 b991439 98b1c09 22b92ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import torch
from transformers import VisionEncoderDecoderModel, DonutProcessor
from PIL import Image
from pdf2image import convert_from_bytes
import gradio as gr
# Configuraci贸n del modelo Donut
MODEL_ID = "mychen76/invoice-and-receipts_donut_v1"
print("Cargando modelo Donut...")
model = VisionEncoderDecoderModel.from_pretrained(MODEL_ID)
processor = DonutProcessor.from_pretrained(MODEL_ID)
model.eval()
# Funci贸n para procesar documentos
def process_document(file_path):
# Leer y procesar el archivo
if file_path.endswith(".pdf"):
with open(file_path, "rb") as pdf_file:
images = convert_from_bytes(pdf_file.read(), dpi=300)
else:
images = [Image.open(file_path).convert("RGB")]
results = []
for img in images:
# Preprocesar la imagen
inputs = processor(img, return_tensors="pt", max_patches=1024)
# Generar predicci贸n
with torch.no_grad():
outputs = model.generate(**inputs)
# Decodificar resultado
result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
results.append(result)
return results
# Interfaz Gradio
iface = gr.Interface(
fn=process_document,
inputs=gr.File(label="Sube tu factura o recibo (PDF o imagen)", type="filepath"),
outputs="json",
title="Donut OCR - Extracci贸n de datos de facturas",
description="Sube un PDF o imagen y extrae informaci贸n estructurada (n煤mero de factura, fecha, monto, etc.) utilizando Donut OCR."
)
# Iniciar la aplicaci贸n
if __name__ == "__main__":
iface.launch()
|