File size: 2,629 Bytes
10e9b7d
 
3c4371f
5c8f5e2
4b2e344
03cbe8f
10e9b7d
03cbe8f
31243f4
03cbe8f
4b2e344
3c4371f
03cbe8f
4b2e344
 
 
3c4371f
03cbe8f
5c8f5e2
 
4a62492
5c8f5e2
 
e80aab9
5c8f5e2
 
e80aab9
5c8f5e2
 
 
e80aab9
5c8f5e2
 
e80aab9
5c8f5e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80aab9
 
5c8f5e2
 
e80aab9
040af41
5c8f5e2
 
03cbe8f
e80aab9
040af41
 
f06b73f
040af41
 
 
e80aab9
03cbe8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import gradio as gr
import pandas as pd
import traceback
from graph_builder import build_graph
from api_client import fetch_questions, submit_answers

class GaiaAgent:
    def __init__(self):
        self.graph = build_graph()
        self.executor = self.graph.compile()

    def __call__(self, question):
        # Explicitly pass the state dictionary
        result_state = self.executor.invoke({"question": question})
        return result_state.get("final_answer", "No answer generated.")

def run_and_submit_all(profile):
    try:
        space_id = os.getenv("SPACE_ID")
        username = profile if profile else None
        if not username:
            return "❌ Please log in to Hugging Face.", None

        agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
        agent = GaiaAgent()

        questions_data = fetch_questions()
        if not questions_data:
            return "❌ No questions retrieved from GAIA API.", None

        answers_payload = []
        results_log = []

        for item in questions_data:
            task_id = item["task_id"]
            question = item["question"]
            answer = agent(question)
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
            results_log.append({
                "Task ID": task_id,
                "Question": question,
                "Submitted Answer": answer
            })

        result = submit_answers(username, agent_code, answers_payload)
        final_status = (
            f"✅ Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score')}%\n"
            f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
            f"Message: {result.get('message', '')}"
        )
        return final_status, pd.DataFrame(results_log)

    except Exception as e:
        error_message = (
            f"❌ An error occurred:\n{str(e)}\n\n"
            f"Traceback:\n{traceback.format_exc()}"
        )
        return error_message, None

with gr.Blocks() as demo:
    gr.Markdown("# 🤖 GAIA LangGraph Agent")
    gr.Markdown("Log in and run your agent to evaluate on the GAIA benchmark.")

    login_button = gr.LoginButton()
    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status", lines=10)
    results_table = gr.DataFrame(label="Results")

    run_button.click(
        fn=run_and_submit_all,
        inputs=[login_button],
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    demo.launch()