Spaces:
Sleeping
Sleeping
File size: 1,940 Bytes
10e9b7d 3c4371f 03cbe8f 10e9b7d 03cbe8f 31243f4 03cbe8f 3c4371f 03cbe8f 3c4371f 03cbe8f e80aab9 36ed51a 03cbe8f e80aab9 03cbe8f 7d65c66 03cbe8f e80aab9 03cbe8f e80aab9 03cbe8f e80aab9 03cbe8f e80aab9 7e4a06b 31243f4 03cbe8f e80aab9 03cbe8f e80aab9 03cbe8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import os
import gradio as gr
import pandas as pd
from langchain_core.messages import HumanMessage
from graph_builder import build_graph
from api_client import fetch_questions, submit_answers
class GaiaAgent:
def __init__(self):
self.graph = build_graph()
def __call__(self, question):
state = {"question": question}
result_state = self.graph.invoke(state)
return result_state["final_answer"]
def run_and_submit_all(profile):
space_id = os.getenv("SPACE_ID")
username = profile.username if profile else None
if not username:
return "Please log in to Hugging Face.", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
agent = GaiaAgent()
questions_data = fetch_questions()
answers_payload = []
results_log = []
for item in questions_data:
task_id = item["task_id"]
question = item["question"]
answer = agent(question)
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
result = submit_answers(username, agent_code, answers_payload)
final_status = f"Submission Successful!\nUser: {result.get('username')}\nScore: {result.get('score')}%\nCorrect: {result.get('correct_count')}/{result.get('total_attempted')}\nMessage: {result.get('message', '')}"
return final_status, pd.DataFrame(results_log)
with gr.Blocks() as demo:
gr.Markdown("# GAIA LangGraph Agent")
gr.Markdown("Log in and run your agent to evaluate on GAIA benchmark.")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status", lines=5)
results_table = gr.DataFrame(label="Results")
run_button.click(run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch()
|