|
import os
|
|
import gradio as gr
|
|
import requests
|
|
import pandas as pd
|
|
import logging
|
|
import time
|
|
import traceback
|
|
from typing import Dict, Any, Optional, Tuple, List, Union
|
|
|
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
from gaia_agent import GAIAAgent
|
|
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger("gaia_evaluation")
|
|
|
|
|
|
def initialize_agent():
|
|
try:
|
|
agent = GAIAAgent()
|
|
logger.info("GAIA Agent initialized successfully")
|
|
return agent
|
|
except Exception as e:
|
|
logger.error(f"Error initializing GAIA Agent: {e}")
|
|
logger.error(traceback.format_exc())
|
|
return None
|
|
|
|
|
|
def run_and_submit_all(profile=None):
|
|
"""
|
|
Fetches all questions, runs the GAIA Agent on them, submits all answers,
|
|
and displays the results.
|
|
"""
|
|
|
|
space_id = os.getenv("SPACE_ID", "")
|
|
|
|
|
|
if profile:
|
|
if hasattr(profile, 'username'):
|
|
username = profile.username
|
|
logger.info(f"User logged in: {username}")
|
|
else:
|
|
username = str(profile)
|
|
logger.info(f"Using provided username: {username}")
|
|
else:
|
|
logger.warning("User not logged in.")
|
|
return "Please Login to Hugging Face with the button.", None
|
|
|
|
api_url = DEFAULT_API_URL
|
|
questions_url = f"{api_url}/questions"
|
|
submit_url = f"{api_url}/submit"
|
|
|
|
|
|
try:
|
|
agent = initialize_agent()
|
|
if agent is None:
|
|
error_msg = "Error initializing GAIA Agent. Check logs for details."
|
|
logger.error(error_msg)
|
|
return error_msg, None
|
|
except Exception as e:
|
|
error_msg = f"Error instantiating agent: {e}"
|
|
logger.error(error_msg)
|
|
return error_msg, None
|
|
|
|
|
|
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
logger.info(f"Using agent code URL: {agent_code}")
|
|
|
|
|
|
logger.info(f"Fetching questions from: {questions_url}")
|
|
try:
|
|
response = requests.get(questions_url, timeout=15)
|
|
response.raise_for_status()
|
|
questions_data = response.json()
|
|
if not questions_data:
|
|
logger.warning("Fetched questions list is empty.")
|
|
return "Fetched questions list is empty or invalid format.", None
|
|
logger.info(f"Fetched {len(questions_data)} questions.")
|
|
except requests.exceptions.RequestException as e:
|
|
error_msg = f"Error fetching questions: {e}"
|
|
logger.error(error_msg)
|
|
return error_msg, None
|
|
except requests.exceptions.JSONDecodeError as e:
|
|
error_msg = f"Error decoding JSON response from questions endpoint: {e}"
|
|
logger.error(f"{error_msg}\nResponse text: {response.text[:500]}")
|
|
return error_msg, None
|
|
except Exception as e:
|
|
error_msg = f"An unexpected error occurred fetching questions: {e}"
|
|
logger.error(error_msg)
|
|
logger.error(traceback.format_exc())
|
|
return error_msg, None
|
|
|
|
|
|
results_log = []
|
|
answers_payload = []
|
|
logger.info(f"Running agent on {len(questions_data)} questions...")
|
|
|
|
for i, item in enumerate(questions_data):
|
|
|
|
task_id = item.get("task_id", item.get("id", f"q{i+1}"))
|
|
question_text = item.get("question")
|
|
|
|
if not task_id or question_text is None:
|
|
logger.warning(f"Skipping item with missing task_id or question: {item}")
|
|
continue
|
|
|
|
logger.info(f"Processing question {i+1}/{len(questions_data)}: {question_text[:50]}...")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
|
|
submitted_answer = agent.process_question(question_text)
|
|
processing_time = time.time() - start_time
|
|
|
|
|
|
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
|
|
|
|
|
results_log.append({
|
|
"Task ID": task_id,
|
|
"Question": question_text,
|
|
"Submitted Answer": submitted_answer,
|
|
"Processing Time": f"{processing_time:.2f}s",
|
|
"Status": "Success"
|
|
})
|
|
|
|
logger.info(f"Question {i+1} processed successfully in {processing_time:.2f}s")
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error running agent on task {task_id}: {e}"
|
|
logger.error(error_msg)
|
|
logger.error(traceback.format_exc())
|
|
|
|
results_log.append({
|
|
"Task ID": task_id,
|
|
"Question": question_text,
|
|
"Submitted Answer": f"AGENT ERROR: {e}",
|
|
"Processing Time": f"{time.time() - start_time:.2f}s",
|
|
"Status": "Error"
|
|
})
|
|
|
|
|
|
if not answers_payload:
|
|
logger.warning("Agent did not produce any answers to submit.")
|
|
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
|
|
|
submission_data = {
|
|
"username": username.strip(),
|
|
"agent_code": agent_code,
|
|
"answers": answers_payload
|
|
}
|
|
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
|
logger.info(status_update)
|
|
|
|
|
|
logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
|
try:
|
|
response = requests.post(submit_url, json=submission_data, timeout=60)
|
|
response.raise_for_status()
|
|
result_data = response.json()
|
|
|
|
|
|
correct_count = result_data.get("correct_count", 0)
|
|
total_attempted = result_data.get("total_attempted", 0)
|
|
score = result_data.get("score", "N/A")
|
|
|
|
final_status = (
|
|
f"Submission Successful!\n"
|
|
f"User: {result_data.get('username', username)}\n"
|
|
f"Overall Score: {score}% "
|
|
f"({correct_count}/{total_attempted} correct)\n"
|
|
f"Message: {result_data.get('message', 'No message received.')}"
|
|
)
|
|
logger.info("Submission successful.")
|
|
results_df = pd.DataFrame(results_log)
|
|
return final_status, results_df
|
|
except requests.exceptions.HTTPError as e:
|
|
error_detail = f"Server responded with status {e.response.status_code}."
|
|
try:
|
|
error_json = e.response.json()
|
|
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
|
except:
|
|
error_detail += f" Response: {e.response.text[:500]}"
|
|
status_message = f"Submission Failed: {error_detail}"
|
|
logger.error(status_message)
|
|
results_df = pd.DataFrame(results_log)
|
|
return status_message, results_df
|
|
except requests.exceptions.Timeout:
|
|
status_message = "Submission Failed: The request timed out."
|
|
logger.error(status_message)
|
|
results_df = pd.DataFrame(results_log)
|
|
return status_message, results_df
|
|
except requests.exceptions.RequestException as e:
|
|
status_message = f"Submission Failed: Network error - {e}"
|
|
logger.error(status_message)
|
|
results_df = pd.DataFrame(results_log)
|
|
return status_message, results_df
|
|
except Exception as e:
|
|
status_message = f"An unexpected error occurred during submission: {e}"
|
|
logger.error(status_message)
|
|
logger.error(traceback.format_exc())
|
|
results_df = pd.DataFrame(results_log)
|
|
return status_message, results_df
|
|
|
|
|
|
|
|
with gr.Blocks() as demo:
|
|
gr.Markdown("# GAIA Agent Evaluation Runner")
|
|
gr.Markdown(
|
|
"""
|
|
**Instructions:**
|
|
|
|
1. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
|
2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the GAIA agent, submit answers, and see the score.
|
|
|
|
---
|
|
**Note:** Running the evaluation may take some time as the agent processes all questions. Please be patient.
|
|
"""
|
|
)
|
|
|
|
|
|
login_btn = gr.LoginButton()
|
|
|
|
|
|
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
|
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
|
|
|
|
|
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
|
|
|
|
|
run_button.click(
|
|
fn=run_and_submit_all,
|
|
inputs=login_btn,
|
|
outputs=[status_output, results_table]
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
|
space_host_startup = os.getenv("SPACE_HOST")
|
|
space_id_startup = os.getenv("SPACE_ID")
|
|
|
|
if space_host_startup:
|
|
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
|
else:
|
|
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
|
|
|
if space_id_startup:
|
|
print(f"✅ SPACE_ID found: {space_id_startup}")
|
|
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
|
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
else:
|
|
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
|
|
|
print("-"*(60 + len(" App Starting ")) + "\n")
|
|
|
|
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
|
|
|
is_running_in_space = bool(space_host_startup and space_id_startup)
|
|
|
|
if is_running_in_space:
|
|
|
|
demo.launch(
|
|
debug=False,
|
|
share=False,
|
|
server_name="0.0.0.0"
|
|
)
|
|
else:
|
|
|
|
demo.launch(
|
|
debug=True,
|
|
share=False
|
|
) |