|
|
|
|
|
import os |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
|
|
from agent import make_agent |
|
|
|
API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
|
|
if not profile: |
|
return "🔒 Por favor, haz login con Hugging Face.", None |
|
username = profile.username |
|
space_id = os.getenv("SPACE_ID", "") |
|
|
|
|
|
agent = make_agent() |
|
|
|
|
|
resp = requests.get(f"{API_URL}/questions", timeout=20) |
|
resp.raise_for_status() |
|
questions = resp.json() |
|
|
|
|
|
answers, log = [], [] |
|
for q in questions: |
|
tid, text = q["task_id"], q["question"] |
|
prompt = ( |
|
"Responde SÓLO con la respuesta final, sin explicaciones.\n" |
|
f"{text}\n" |
|
"Respuesta:" |
|
) |
|
raw = agent.run(prompt) |
|
|
|
ans = next((l for l in raw.splitlines()[::-1] if l.strip()), raw).strip() |
|
answers.append({"task_id": tid, "submitted_answer": ans}) |
|
log.append({"Task ID": tid, "Question": text, "Answer": ans}) |
|
|
|
|
|
payload = { |
|
"username": username, |
|
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main", |
|
"answers": answers, |
|
} |
|
sub = requests.post(f"{API_URL}/submit", json=payload, timeout=60) |
|
sub.raise_for_status() |
|
result = sub.json() |
|
|
|
|
|
status = ( |
|
f"✅ Submission Successful!\n" |
|
f"User: {result['username']}\n" |
|
f"Score: {result['score']}% " |
|
f"({result.get('correct_count')}/{result.get('total_attempted')})\n" |
|
f"{result.get('message')}" |
|
) |
|
return status, pd.DataFrame(log) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# 🧠 GAIA Final Agent Mini") |
|
gr.Markdown("Haz login y pulsa el botón para evaluar tu agente en GAIA.") |
|
gr.LoginButton() |
|
run_btn = gr.Button("Run Evaluation & Submit All Answers") |
|
status = gr.Textbox(label="Resultado", lines=6, interactive=False) |
|
table = gr.Dataframe(label="Preguntas y Respuestas", wrap=True) |
|
|
|
run_btn.click(fn=run_and_submit_all, outputs=[status, table]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |