import os import gradio as gr import pandas as pd from langchain_core.messages import HumanMessage from graph_builder import build_graph from api_client import fetch_questions, submit_answers class GaiaAgent: def __init__(self): self.graph = build_graph() def __call__(self, question): state = {"question": question} result_state = self.graph.invoke(state) return result_state["final_answer"] def run_and_submit_all(profile): space_id = os.getenv("SPACE_ID") username = profile.username if profile else None if not username: return "Please log in to Hugging Face.", None agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" agent = GaiaAgent() questions_data = fetch_questions() answers_payload = [] results_log = [] for item in questions_data: task_id = item["task_id"] question = item["question"] answer = agent(question) answers_payload.append({"task_id": task_id, "submitted_answer": answer}) results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer}) result = submit_answers(username, agent_code, answers_payload) final_status = f"Submission Successful!\nUser: {result.get('username')}\nScore: {result.get('score')}%\nCorrect: {result.get('correct_count')}/{result.get('total_attempted')}\nMessage: {result.get('message', '')}" return final_status, pd.DataFrame(results_log) with gr.Blocks() as demo: gr.Markdown("# GAIA LangGraph Agent") gr.Markdown("Log in and run your agent to evaluate on GAIA benchmark.") gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Run Status", lines=5) results_table = gr.DataFrame(label="Results") run_button.click(run_and_submit_all, outputs=[status_output, results_table]) if __name__ == "__main__": demo.launch()