Spaces:

WalterX00
/

AIA-Agent

Sleeping

File size: 4,607 Bytes

c17a53d
a6c1b66
 
26c4fd9
a6c1b66
26c4fd9
a6c1b66
c17a53d
a6c1b66
 
 
 
 
899531f
8c14e3d
a6c1b66
 
 
 
 
8c14e3d
26c4fd9
a6c1b66
 
 
 
 
 
 
 
 
 
 
 
26c4fd9
 
a6c1b66
26c4fd9
a6c1b66
8c14e3d
a6c1b66
 
7baa46e
a6c1b66
7baa46e
a6c1b66
8c14e3d
a6c1b66
26c4fd9
a6c1b66
 
 
 
26c4fd9
a6c1b66
 
26c4fd9
a6c1b66
3018f62
a6c1b66
 
 
 
 
 
3018f62
a6c1b66
 
 
 
 
26c4fd9
a6c1b66
 
 
 
 
 
26c4fd9
 
a6c1b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26c4fd9
a6c1b66
 
bcce881
a6c1b66
 
 
 
 
 
 
 
 
 
26c4fd9
a6c1b66
 
 
 
 
 
 
26c4fd9
a6c1b66

import gradio as gr
import fitz  # PyMuPDF
import torch
import numpy as np
import os
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from sentence_transformers import SentenceTransformer

# Configurazione ambiente
os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
os.environ["GRADIO_SERVER_PORT"] = "7860"

# Costanti configurabili
PDF_PATH = "reg_2024.pdf"
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
QA_MODEL = "mrm8488/bert-italian-finedtuned-squadv1-it-alfa"
CHUNK_SIZE = 800
OVERLAP = 150
TOP_K_CHUNKS = 3
MIN_SCORE = 0.1

def load_models():
    """Carica i modelli con gestione ottimizzata della memoria"""
    try:
        # Prova a caricare con GPU/quantizzazione
        model = AutoModelForQuestionAnswering.from_pretrained(
            QA_MODEL,
            device_map="auto",
            load_in_4bit=True if torch.cuda.is_available() else False,
            torch_dtype=torch.float16
        )
    except ImportError:
        # Fallback per CPU
        model = AutoModelForQuestionAnswering.from_pretrained(QA_MODEL)
    
    tokenizer = AutoTokenizer.from_pretrained(QA_MODEL)
    embedder = SentenceTransformer(EMBEDDING_MODEL)
    
    return model, tokenizer, embedder

def process_pdf():
    """Elabora il PDF e crea gli embeddings"""
    text = ""
    with fitz.open(PDF_PATH) as doc:
        for page in doc:
            text += page.get_text().replace("\n", " ") + " "
    
    # Suddivisione in chunk ottimizzata
    words = text.split()
    chunks = [
        ' '.join(words[i:i + CHUNK_SIZE]) 
        for i in range(0, len(words), CHUNK_SIZE - OVERLAP)
    ]
    
    # Calcola embeddings
    embeddings = model_embed.encode(chunks, convert_to_tensor=True)
    
    return chunks, embeddings

def semantic_search(query, chunks, embeddings):
    """Ricerca semantica dei chunk più rilevanti"""
    query_embed = model_embed.encode(query, convert_to_tensor=True)
    scores = torch.nn.functional.cosine_similarity(query_embed, embeddings)
    top_indices = torch.topk(scores, k=TOP_K_CHUNKS).indices.cpu().numpy()
    return [chunks[i] for i in top_indices]

def answer_question(question):
    """Pipeline completa per la risposta"""
    try:
        # Ricerca contestuale
        relevant_chunks = semantic_search(question, doc_chunks, doc_embeddings)
        
        # Inizializza pipeline QA
        qa_pipe = pipeline(
            "question-answering",
            model=model_qa,
            tokenizer=tokenizer,
            device=0 if torch.cuda.is_available() else -1
        )
        
        best_answer = {"answer": "Nessuna risposta trovata", "score": 0}
        
        # Analizza ogni chunk rilevante
        for chunk in relevant_chunks:
            try:
                result = qa_pipe(
                    question=question,
                    context=chunk,
                    max_answer_len=100,
                    handle_impossible_answer=True
                )
                
                if result["score"] > best_answer["score"]:
                    best_answer = result
            except Exception:
                continue
        
        if best_answer["score"] > MIN_SCORE:
            return best_answer["answer"]
        return "Nessuna risposta sufficientemente certa trovata nel documento"
    
    except Exception as e:
        return f"Errore durante l'elaborazione: {str(e)}"

# Caricamento iniziale risorse
print("Caricamento modelli...")
model_qa, tokenizer, model_embed = load_models()
print("Elaborazione documento...")
doc_chunks, doc_embeddings = process_pdf()

# Interfaccia Gradio
with gr.Blocks(title="AI Esperto Regolamento Calcio") as demo:
    gr.Markdown("# ⚽ Assistente Virtuale Regolamento FIFA")
    gr.Markdown("Poni domande sul regolamento ufficiale del calcio")
    
    with gr.Row():
        question = gr.Textbox(
            label="La tua domanda",
            placeholder="Es: Quando si assegna un calcio di rigore?",
            max_lines=2
        )
        answer = gr.Textbox(label="Risposta ufficiale", interactive=False)
    
    examples = gr.Examples(
        examples=[
            ["Quanti cambi sono permessi a partita?"],
            ["Cosa costituisce un fallo da cartellino rosso diretto?"],
            ["Quali sono le dimensioni minime del campo?"]
        ],
        inputs=[question],
        outputs=[answer],
        fn=answer_question,
        cache_examples=True
    )
    
    question.submit(fn=answer_question, inputs=[question], outputs=[answer])

if __name__ == "__main__":
    demo.launch(show_error=True, share=True)