File size: 1,586 Bytes
22b92ea
98b1c09
 
 
 
 
 
b991439
98b1c09
 
 
 
 
 
b991439
 
 
 
 
98b1c09
b991439
98b1c09
 
 
 
 
b991439
98b1c09
 
 
 
 
 
 
 
 
 
 
b991439
98b1c09
 
 
 
 
 
 
 
22b92ea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import torch
from transformers import VisionEncoderDecoderModel, DonutProcessor
from PIL import Image
from pdf2image import convert_from_bytes
import gradio as gr

# Configuraci贸n del modelo Donut
MODEL_ID = "mychen76/invoice-and-receipts_donut_v1"
print("Cargando modelo Donut...")
model = VisionEncoderDecoderModel.from_pretrained(MODEL_ID)
processor = DonutProcessor.from_pretrained(MODEL_ID)
model.eval()

# Funci贸n para procesar documentos
def process_document(file_path):
    # Leer y procesar el archivo
    if file_path.endswith(".pdf"):
        with open(file_path, "rb") as pdf_file:
            images = convert_from_bytes(pdf_file.read(), dpi=300)
    else:
        images = [Image.open(file_path).convert("RGB")]

    results = []
    for img in images:
        # Preprocesar la imagen
        inputs = processor(img, return_tensors="pt", max_patches=1024)
        # Generar predicci贸n
        with torch.no_grad():
            outputs = model.generate(**inputs)
        # Decodificar resultado
        result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
        results.append(result)
    
    return results

# Interfaz Gradio
iface = gr.Interface(
    fn=process_document,
    inputs=gr.File(label="Sube tu factura o recibo (PDF o imagen)", type="filepath"),
    outputs="json",
    title="Donut OCR - Extracci贸n de datos de facturas",
    description="Sube un PDF o imagen y extrae informaci贸n estructurada (n煤mero de factura, fecha, monto, etc.) utilizando Donut OCR."
)

# Iniciar la aplicaci贸n
if __name__ == "__main__":
    iface.launch()