import os import json import requests import traceback import gradio as gr import pandas as pd from typing import Optional from dotenv import load_dotenv from huggingface_hub import InferenceClient # ──────────────── Load Environment ──────────────── load_dotenv() hf_token = os.getenv("HF_TOKEN") DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1" # ──────────────── Load Inference Client ──────────────── try: client = InferenceClient(model=MODEL_ID, token=hf_token) except Exception as e: raise RuntimeError(f"Model loading failed: {e}") # ──────────────── Agent Class ──────────────── class MistralAgent: def __init__(self): print("✅ MistralAgent (Inference API) initialized.") def __call__(self, question: str) -> str: try: prompt = f"[INST] {question.strip()} [/INST]" response = client.text_generation(prompt, max_new_tokens=100, temperature=0.0, do_sample=False) return response.strip() except Exception as e: return f"LLM Error: {e}" # ──────────────── API Helpers ──────────────── def get_all_questions(api_url: str) -> list[dict]: resp = requests.get(f"{api_url}/questions", timeout=15) resp.raise_for_status() return resp.json() def submit_answers(api_url: str, username: str, code_link: str, answers: list[dict]) -> dict: payload = { "username": username, "agent_code": code_link, "answers": answers } resp = requests.post(f"{api_url}/submit", json=payload, timeout=60) resp.raise_for_status() return resp.json() # ──────────────── Gradio Evaluation Logic ──────────────── def run_and_submit_all(profile: Optional[gr.OAuthProfile]): if not profile: return "❌ Please log in to Hugging Face using the button above.", None username = profile.username.strip() space_id = os.getenv("SPACE_ID", "") code_link = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "" try: agent = MistralAgent() except Exception as e: return f"❌ Error initializing agent: {e}", None try: questions_data = get_all_questions(DEFAULT_API_URL) except Exception as e: return f"❌ Failed to load questions: {e}", None answers_payload = [] results_log = [] for item in questions_data: task_id = item.get("task_id") question_text = item.get("question", "") if not task_id or not question_text: continue try: submitted_answer = agent(question_text) except Exception as e: submitted_answer = f"AGENT ERROR: {e}" answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) if not answers_payload: return "❌ No answers submitted.", pd.DataFrame(results_log) try: result_data = submit_answers(DEFAULT_API_URL, username, code_link, answers_payload) except requests.exceptions.HTTPError as e: try: detail = e.response.json().get("detail", e.response.text) except Exception: detail = e.response.text return f"❌ Submission Failed: HTTP {e.response.status_code}. Detail: {detail}", pd.DataFrame(results_log) except Exception as e: return f"❌ Submission Error: {e}", pd.DataFrame(results_log) score = result_data.get("score", "N/A") correct_count = result_data.get("correct_count", "?") total = result_data.get("total_attempted", "?") message = result_data.get("message", "") final_status = ( f"✅ Submission Successful!\n" f"User: {username}\n" f"Score: {score}% ({correct_count}/{total} correct)\n" f"Message: {message}" ) return final_status, pd.DataFrame(results_log) # ──────────────── Gradio UI ──────────────── with gr.Blocks() as demo: gr.Markdown("# 🧠 Mistral-7B Agent Evaluation (via Inference API)") gr.Markdown( """ **Instructions:** 1. Copy this Space and define your own agent logic. 2. Log in with your Hugging Face account. 3. Click “Run Evaluation & Submit All Answers” to test and submit. """ ) gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Status", lines=5, interactive=False) results_table = gr.DataFrame(label="Agent Answers", wrap=True) run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table]) if __name__ == "__main__": print("Launching Gradio Interface...") demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)