File size: 5,214 Bytes
10e9b7d
0d7ddf0
eccf8e4
0d7ddf0
 
3c4371f
0d7ddf0
 
f63b186
10e9b7d
f63b186
0d7ddf0
345402c
3db6293
f63b186
e80aab9
f63b186
345402c
f63b186
345402c
 
0d7ddf0
f63b186
 
31243f4
f63b186
3c4371f
0d7ddf0
 
f63b186
 
 
0d7ddf0
 
 
f63b186
0d7ddf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f63b186
 
345402c
0d7ddf0
345402c
3c4371f
0d7ddf0
 
e80aab9
31243f4
f63b186
31243f4
0d7ddf0
 
eccf8e4
0d7ddf0
7d65c66
0d7ddf0
e80aab9
7d65c66
0d7ddf0
345402c
31243f4
 
0d7ddf0
 
31243f4
 
7d65c66
31243f4
0d7ddf0
 
 
31243f4
 
0d7ddf0
e80aab9
 
0d7ddf0
e80aab9
 
0d7ddf0
 
 
 
7d65c66
0d7ddf0
 
 
 
 
 
 
 
 
 
 
 
 
 
e80aab9
f63b186
e80aab9
f63b186
0ee0419
e514fd7
 
 
0d7ddf0
 
 
e514fd7
e80aab9
7e4a06b
31243f4
0d7ddf0
 
e80aab9
345402c
e80aab9
 
0d7ddf0
235b2f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import json
import requests
import traceback
import gradio as gr
import pandas as pd
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient

# ────────────────  Load Environment ────────────────
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1"

# ────────────────  Load Inference Client ────────────────
try:
    client = InferenceClient(model=MODEL_ID, token=hf_token)
except Exception as e:
    raise RuntimeError(f"Model loading failed: {e}")

# ────────────────  Agent Class ────────────────
class MistralAgent:
    def __init__(self):
        print("βœ… MistralAgent (Inference API) initialized.")

    def __call__(self, question: str) -> str:
        try:
            prompt = f"[INST] {question.strip()} [/INST]"
            response = client.text_generation(prompt, max_new_tokens=100, temperature=0.0, do_sample=False)
            return response.strip()
        except Exception as e:
            return f"LLM Error: {e}"

# ────────────────  API Helpers ────────────────
def get_all_questions(api_url: str) -> list[dict]:
    resp = requests.get(f"{api_url}/questions", timeout=15)
    resp.raise_for_status()
    return resp.json()

def submit_answers(api_url: str, username: str, code_link: str, answers: list[dict]) -> dict:
    payload = {
        "username": username,
        "agent_code": code_link,
        "answers": answers
    }
    resp = requests.post(f"{api_url}/submit", json=payload, timeout=60)
    resp.raise_for_status()
    return resp.json()

# ────────────────  Gradio Evaluation Logic ────────────────
def run_and_submit_all(profile: Optional[gr.OAuthProfile]):
    if not profile:
        return "❌ Please log in to Hugging Face using the button above.", None
    username = profile.username.strip()

    space_id = os.getenv("SPACE_ID", "")
    code_link = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""

    try:
        agent = MistralAgent()
    except Exception as e:
        return f"❌ Error initializing agent: {e}", None

    try:
        questions_data = get_all_questions(DEFAULT_API_URL)
    except Exception as e:
        return f"❌ Failed to load questions: {e}", None

    answers_payload = []
    results_log = []

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question", "")
        if not task_id or not question_text:
            continue
        try:
            submitted_answer = agent(question_text)
        except Exception as e:
            submitted_answer = f"AGENT ERROR: {e}"
        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

    if not answers_payload:
        return "❌ No answers submitted.", pd.DataFrame(results_log)

    try:
        result_data = submit_answers(DEFAULT_API_URL, username, code_link, answers_payload)
    except requests.exceptions.HTTPError as e:
        try:
            detail = e.response.json().get("detail", e.response.text)
        except Exception:
            detail = e.response.text
        return f"❌ Submission Failed: HTTP {e.response.status_code}. Detail: {detail}", pd.DataFrame(results_log)
    except Exception as e:
        return f"❌ Submission Error: {e}", pd.DataFrame(results_log)

    score = result_data.get("score", "N/A")
    correct_count = result_data.get("correct_count", "?")
    total = result_data.get("total_attempted", "?")
    message = result_data.get("message", "")

    final_status = (
        f"βœ… Submission Successful!\n"
        f"User: {username}\n"
        f"Score: {score}% ({correct_count}/{total} correct)\n"
        f"Message: {message}"
    )
    return final_status, pd.DataFrame(results_log)

# ────────────────  Gradio UI ────────────────
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Mistral-7B Agent Evaluation (via Inference API)")
    gr.Markdown(
        """
        **Instructions:**

        1. Copy this Space and define your own agent logic.
        2. Log in with your Hugging Face account.
        3. Click β€œRun Evaluation & Submit All Answers” to test and submit.
        """
    )
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Agent Answers", wrap=True)

    run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])

if __name__ == "__main__":
    print("Launching Gradio Interface...")
    demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)