import os import gradio as gr import pandas as pd import traceback from graph_builder import build_graph from api_client import fetch_questions, submit_answers class GaiaAgent: def __init__(self): self.graph = build_graph() self.executor = self.graph.compile() def __call__(self, question): # Explicitly pass the state dictionary result_state = self.executor.invoke({"question": question}) return result_state.get("final_answer", "No answer generated.") def run_and_submit_all(profile): try: space_id = os.getenv("SPACE_ID") username = profile if profile else None if not username: return "❌ Please log in to Hugging Face.", None agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" agent = GaiaAgent() questions_data = fetch_questions() if not questions_data: return "❌ No questions retrieved from GAIA API.", None answers_payload = [] results_log = [] for item in questions_data: task_id = item["task_id"] question = item["question"] answer = agent(question) answers_payload.append({"task_id": task_id, "submitted_answer": answer}) results_log.append({ "Task ID": task_id, "Question": question, "Submitted Answer": answer }) result = submit_answers(username, agent_code, answers_payload) final_status = ( f"✅ Submission Successful!\n" f"User: {result.get('username')}\n" f"Score: {result.get('score')}%\n" f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n" f"Message: {result.get('message', '')}" ) return final_status, pd.DataFrame(results_log) except Exception as e: error_message = ( f"❌ An error occurred:\n{str(e)}\n\n" f"Traceback:\n{traceback.format_exc()}" ) return error_message, None with gr.Blocks() as demo: gr.Markdown("# 🤖 GAIA LangGraph Agent") gr.Markdown("Log in and run your agent to evaluate on the GAIA benchmark.") login_button = gr.LoginButton() run_button = gr.Button("🚀 Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Run Status", lines=10) results_table = gr.DataFrame(label="Results") run_button.click( fn=run_and_submit_all, inputs=[login_button], outputs=[status_output, results_table] ) if __name__ == "__main__": demo.launch()