# app.py import os import gradio as gr import requests import pandas as pd from agent import make_agent API_URL = "https://agents-course-unit4-scoring.hf.space" def run_and_submit_all(profile: gr.OAuthProfile | None): # 0) Comprueba login if not profile: return "🔒 Por favor, haz login con Hugging Face.", None username = profile.username space_id = os.getenv("SPACE_ID", "") # 1) Instancia tu agente agent = make_agent() # 2) Descarga las 20 preguntas de GAIA resp = requests.get(f"{API_URL}/questions", timeout=20) resp.raise_for_status() questions = resp.json() # 3) Para cada pregunta: construye prompt, ejecuta y extrae la Ășltima lĂ­nea answers, log = [], [] for q in questions: tid, text = q["task_id"], q["question"] prompt = ( "Responde SÓLO con la respuesta final, sin explicaciones.\n" f"{text}\n" "Respuesta:" ) raw = agent.run(prompt) # toma la Ășltima lĂ­nea no vacĂ­a ans = next((l for l in raw.splitlines()[::-1] if l.strip()), raw).strip() answers.append({"task_id": tid, "submitted_answer": ans}) log.append({"Task ID": tid, "Question": text, "Answer": ans}) # 4) EnvĂ­a el payload payload = { "username": username, "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main", "answers": answers, } sub = requests.post(f"{API_URL}/submit", json=payload, timeout=60) sub.raise_for_status() result = sub.json() # 5) Muestra resultado y tabla status = ( f"✅ Submission Successful!\n" f"User: {result['username']}\n" f"Score: {result['score']}% " f"({result.get('correct_count')}/{result.get('total_attempted')})\n" f"{result.get('message')}" ) return status, pd.DataFrame(log) # --- Interfaz Gradio --- with gr.Blocks() as demo: gr.Markdown("# 🧠 GAIA Final Agent Mini") gr.Markdown("Haz login y pulsa el botĂłn para evaluar tu agente en GAIA.") gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit All Answers") status = gr.Textbox(label="Resultado", lines=6, interactive=False) table = gr.Dataframe(label="Preguntas y Respuestas", wrap=True) run_btn.click(fn=run_and_submit_all, outputs=[status, table]) if __name__ == "__main__": demo.launch(debug=True)