File size: 2,405 Bytes
71d590c 41ba21f 10e9b7d eccf8e4 3c4371f 41ba21f 71d590c 10e9b7d 41ba21f e80aab9 b20fcf6 41ba21f b20fcf6 41ba21f 71d590c 1115dd8 41ba21f 71d590c b20fcf6 41ba21f b20fcf6 41ba21f b20fcf6 41ba21f b20fcf6 41ba21f 71d590c 41ba21f 71d590c 41ba21f b20fcf6 41ba21f 1115dd8 41ba21f 1115dd8 41ba21f 1115dd8 54e22ca 1115dd8 41ba21f 71d590c 41ba21f b20fcf6 41ba21f b20fcf6 71d590c b20fcf6 1115dd8 41ba21f e80aab9 41ba21f b20fcf6 41ba21f e80aab9 1115dd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# app.py
import os
import gradio as gr
import requests
import pandas as pd
from agent import make_agent
API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
# 0) Comprueba login
if not profile:
return "🔒 Por favor, haz login con Hugging Face.", None
username = profile.username
space_id = os.getenv("SPACE_ID", "")
# 1) Instancia tu agente
agent = make_agent()
# 2) Descarga las 20 preguntas de GAIA
resp = requests.get(f"{API_URL}/questions", timeout=20)
resp.raise_for_status()
questions = resp.json()
# 3) Para cada pregunta: construye prompt, ejecuta y extrae la última línea
answers, log = [], []
for q in questions:
tid, text = q["task_id"], q["question"]
prompt = (
"Responde SÓLO con la respuesta final, sin explicaciones.\n"
f"{text}\n"
"Respuesta:"
)
raw = agent.run(prompt)
# toma la última línea no vacía
ans = next((l for l in raw.splitlines()[::-1] if l.strip()), raw).strip()
answers.append({"task_id": tid, "submitted_answer": ans})
log.append({"Task ID": tid, "Question": text, "Answer": ans})
# 4) Envía el payload
payload = {
"username": username,
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
"answers": answers,
}
sub = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
sub.raise_for_status()
result = sub.json()
# 5) Muestra resultado y tabla
status = (
f"✅ Submission Successful!\n"
f"User: {result['username']}\n"
f"Score: {result['score']}% "
f"({result.get('correct_count')}/{result.get('total_attempted')})\n"
f"{result.get('message')}"
)
return status, pd.DataFrame(log)
# --- Interfaz Gradio ---
with gr.Blocks() as demo:
gr.Markdown("# 🧠 GAIA Final Agent Mini")
gr.Markdown("Haz login y pulsa el botón para evaluar tu agente en GAIA.")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers")
status = gr.Textbox(label="Resultado", lines=6, interactive=False)
table = gr.Dataframe(label="Preguntas y Respuestas", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status, table])
if __name__ == "__main__":
demo.launch(debug=True) |