Mettaton

Sleeping

App Files Files Community

Mettaton / app.py

DragonProgrammer

Update app.py

46292e7 verified about 2 months ago

raw

history blame contribute delete

18.2 kB

	import os
	import gradio as gr
	import pandas as pd
	from bs4 import BeautifulSoup
	import datetime
	import pytz
	import math
	import re
	import requests
	import traceback
	import sys
	import torch
	import transformers
	from torch.cuda import memory_allocated, memory_reserved

	# --- Transformers Imports ---
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# --- LangChain Imports (Core) ---
	from langchain_huggingface import HuggingFacePipeline
	from langchain_core.prompts import PromptTemplate
	from langchain.tools import Tool
	from langchain.memory import ConversationBufferWindowMemory
	from langchain.llms.base import LLM

	print(f"--- Using transformers version: {transformers.__version__} ---")

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


	# --- Tool Definitions (LangChain Style) ---
	def get_current_time_in_timezone_func(timezone: str) -> str:
	"""A tool that fetches the current local time in a specified IANA timezone. Always use this tool for questions about the current time. Input should be a valid timezone string (e.g., 'America/New_York', 'Europe/London')."""
	print(f"--- Tool: Executing get_current_time_in_timezone for: {timezone} ---")
	try:
	tz = pytz.timezone(timezone)
	local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S %Z%z")
	return f"The current local time in {timezone} is: {local_time}"
	except pytz.exceptions.UnknownTimeZoneError:
	return f"Error: Unknown timezone '{timezone}'. Please use a valid IANA timezone name."
	except Exception as e:
	return f"Error fetching time for timezone '{timezone}': {str(e)}"

	def safe_calculator_func(expression: str) -> str:
	"""A tool for evaluating simple mathematical expressions. Use this tool only for calculations involving numbers, +, -, *, /, %, parentheses, and the math functions: sqrt, pow. Do not use it to run other code."""
	print(f"--- Tool: Executing safe_calculator with expression: {expression} ---")
	try:
	allowed_names = {"sqrt": math.sqrt, "pow": math.pow, "pi": math.pi}
	# Safely evaluate the expression
	if not re.match(r"^[0-9+\-*/().\s,math.sqrtpowpi]+$", expression):
	raise ValueError("Invalid characters in expression")

	result = eval(expression, {"__builtins__": {}}, allowed_names)
	return str(result)
	except Exception as e:
	print(f"Error during calculation for '{expression}': {e}")
	return f"Error calculating '{expression}': Invalid expression or calculation error ({e})."

	class SlicedLLM(LLM):
	"""
	Light wrapper around any LangChain LLM (we'll use the HuggingFacePipeline wrapper).
	Responsibilities:
	- Call the inner LLM
	- Extract text robustly from different return shapes
	- Truncate to `max_chars` from the end (keeps the most recent reasoning)
	- Strip instruction echoing by keeping from the last 'Thought:' if present
	"""
	def __init__(self, inner_llm, max_chars: int = 2048, **kwargs):
	self.inner_llm = inner_llm
	self.max_chars = int(max_chars)
	# required for LangChain LLM subclasses
	self.max_retries = kwargs.get("max_retries", 1)

	@property
	def _llm_type(self) -> str:
	return "sliced-llm"

	def _call(self, prompt: str, stop=None) -> str:
	"""
	Core call entrypoint used by LangChain. We call the inner LLM and then post-process.
	"""
	# 1) Call inner LLM (it may expose _call or be callable)
	raw = None
	# inner may be a LangChain LLM (with _call) or a callable pipeline
	if hasattr(self.inner_llm, "_call"):
	raw = self.inner_llm._call(prompt, stop=stop)
	else:
	# fallback - call and try to extract text
	# Many pipeline wrappers accept a string and return text or list
	raw = self.inner_llm(prompt)

	# 2) Extract text from common return shapes
	text = self._extract_text(raw)

	# 3) Attempt to remove repeated instruction blocks by finding last 'Thought:' anchor
	# We keep text from the last "Thought:" onward if that appears in the output.
	# This removes prompt-echoed instruction blocks that often appear earlier in the string.
	last_thought_idx = text.rfind("\nThought:")
	if last_thought_idx >= 0:
	# keep from the last Thought: (include the marker so parser sees it)
	text = text[last_thought_idx + 1 :] # +1 to keep leading newline trimmed

	# 4) Truncate to keep the most recent reasoning / final answer
	if len(text) > self.max_chars:
	text = text[-self.max_chars :]

	# 5) Strip leading/trailing whitespace
	return text.strip()

	def _extract_text(self, raw):
	"""
	Handle possible return formats:
	- plain str
	- list/dict results from HF pipeline
	- objects exposing .content or ['generated_text']
	"""
	# Direct string
	if isinstance(raw, str):
	return raw

	# If it's a list (transformers pipeline may return list of dicts)
	if isinstance(raw, (list, tuple)) and len(raw) > 0:
	first = raw[0]
	if isinstance(first, dict):
	# common keys: 'generated_text', 'text'
	for k in ("generated_text", "text", "output_text"):
	if k in first:
	return str(first[k])
	# else stringify the dict
	return str(first)
	else:
	return str(first)

	# If it's a dict with 'generated_text' etc.
	if isinstance(raw, dict):
	for k in ("generated_text", "text", "output_text"):
	if k in raw:
	return str(raw[k])
	# fallback to string repr
	return str(raw)

	# Last resort: string conversion
	return str(raw)

	def _identifying_params(self):
	return {"inner": getattr(self.inner_llm, "_llm_type", None), "max_chars": self.max_chars}

	# --- Completely rewritten LangChainAgentWrapper (drop-in) ---
	class LangChainAgentWrapper:
	"""
	Rewritten, robust LangChain agent wrapper:
	- loads Gemma model (model_id variable)
	- wraps HF pipeline into HuggingFacePipeline (LangChain)
	- wraps that into SlicedLLM to truncate / clean model outputs
	- builds ReAct prompt (contains {tools} and {tool_names})
	- creates agent with create_react_agent + AgentExecutor
	"""

	def __init__(
	self,
	model_id: str = "google/gemma-2b-it",
	max_new_tokens: int = 96,
	max_chars: int = 2048,
	max_iterations: int = 2,
	):
	print("Initializing LangChainAgentWrapper...")
	try:
	# Lazy/delayed imports
	from langchain.agents import AgentExecutor, create_react_agent
	from langchain_community.tools import DuckDuckGoSearchRun

	# --- Tokenizer & Model ---
	print(f"Loading tokenizer for: {model_id}")
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	print(f"Loading model: {model_id}")
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	offload_folder="offload",
	)
	print("Model loaded successfully.")
	print(f"Allocated: {memory_allocated()/1e9:.2f} GB \| Reserved: {memory_reserved()/1e9:.2f} GB")

	# --- HF pipeline (transformers) with safe defaults ---
	llm_pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=max_new_tokens,
	return_full_text=False,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)
	print("Transformers pipeline created successfully.")

	# --- Wrap pipeline into LangChain HuggingFacePipeline LLM ---
	base_lc_llm = HuggingFacePipeline(pipeline=llm_pipeline)
	# --- Wrap that LLM into our slicer to keep outputs trimmed and to strip instruction echoes ---
	self.llm = SlicedLLM(base_lc_llm, max_chars=max_chars)

	print("SlicedLLM wrapper created successfully.")

	# --- Tools ---
	print("Defining tools...")
	search_tool = DuckDuckGoSearchRun(
	name="web_search",
	description="Web search via DuckDuckGo for up-to-date facts/events."
	)

	self.tools = [
	Tool(
	name="get_current_time_in_timezone",
	func=get_current_time_in_timezone_func,
	description=get_current_time_in_timezone_func.__doc__
	),
	search_tool,
	Tool(
	name="safe_calculator",
	func=safe_calculator_func,
	description=safe_calculator_func.__doc__
	),
	]
	print(f"Tools prepared: {[t.name for t in self.tools]}")

	# --- ReAct prompt (must contain {tools} and {tool_names}) ---
	react_prompt = PromptTemplate(
	input_variables=["tools", "tool_names", "agent_scratchpad"],
	template="""
	DO NOT REPEAT OR PARAPHRASE ANY PART OF THIS PROMPT.

	You are an assistant that strictly follows the ReAct format.

	You can use these tools:
	{tools}

	Valid tool names: {tool_names}

	When responding, follow this exact grammar and include nothing else:

	Thought: <brief reasoning>
	Action: <one of {tool_names} OR "none">
	Action Input: <input for the action>

	(If you choose an action other than "none", the system will insert an Observation before you continue.)
	If Action is "none", finish by outputting:
	Final Answer: <short direct answer>

	{agent_scratchpad}
	Thought:
	""",
	)

	# --- Create agent + executor ---
	print("Creating agent...")
	agent = create_react_agent(self.llm, self.tools, react_prompt)

	self.agent_executor = AgentExecutor(
	agent=agent,
	tools=self.tools,
	verbose=True,
	handle_parsing_errors=True,
	max_iterations=max_iterations,
	)
	print("LangChain agent created successfully.")

	except Exception as e:
	print(f"CRITICAL ERROR: Failed to initialize LangChain agent: {e}")
	traceback.print_exc()
	raise RuntimeError(f"LangChain agent initialization failed: {e}") from e

	def __call__(self, question: str) -> str:
	"""
	Run the agent on a single question.
	We rely on AgentExecutor to manage the ReAct loops.
	"""
	print(f"\n--- LangChainAgentWrapper received question: {question[:140]}... ---")
	try:
	# AgentExecutor expects {"input": question}
	response = self.agent_executor.invoke({"input": question})
	return response.get("output", "No output found.")
	except Exception as e:
	print(f"ERROR: LangChain agent execution failed: {e}")
	traceback.print_exc()
	# Return an informative string so the outer code can still submit something
	return f"Agent Error: Failed to process the question. Details: {e}"

	def __call__(self, question: str) -> str:
	"""
	Run the agent on a single question. We rely on the AgentExecutor to manage
	ReAct loops and tool invocations. Exceptions are caught and printed; the
	returned string will contain error details in that case.
	"""
	print(f"\n--- LangChainAgentWrapper received question: {question[:100]}... ---")
	try:
	# AgentExecutor expects a dict with the input under the key "input"
	response = self.agent_executor.invoke({"input": question})
	# The LangChain executor returns a complex structure; .get("output") was used previously.
	# Returning response.get("output", "No output found.") preserves prior behaviour.
	return response.get("output", "No output found.")
	except Exception as e:
	print(f"ERROR: LangChain agent execution failed: {e}")
	traceback.print_exc()
	return f"Agent Error: Failed to process the question. Details: {e}"


	# --- Main Evaluation Logic ---
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	space_id = os.getenv("SPACE_ID")
	if profile:
	username= f"{profile.username}"
	print(f"User logged in: {username}")
	else:
	print("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	try:
	agent = LangChainAgentWrapper()
	except Exception as e:
	print(f"Error instantiating agent: {e}")
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
	print(agent_code)

	print(f"Fetching questions from: {questions_url}")
	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except Exception as e:
	print(f"An unexpected error occurred fetching questions: {e}")
	traceback.print_exc()
	return f"An unexpected error occurred fetching questions: {e}", None

	results_log = []
	answers_payload = []
	print(f"Running agent on {len(questions_data)} questions...")

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue

	try:
	submitted_answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
	except Exception as e:
	print(f"Error running agent on task {task_id}: {e}")
	traceback.print_exc()
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

	if not answers_payload:
	print("Agent did not produce any answers to submit.")
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
	status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
	print(status_update)
	print(f"Submitting {len(answers_payload)} answers to: {submit_url}")

	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	print("Submission successful.")
	results_df = pd.DataFrame(results_log)
	return final_status, results_df
	except Exception as e:
	status_message = f"An unexpected error occurred during submission: {e}"
	print(status_message)
	traceback.print_exc()
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	# --- Build Gradio Interface using Blocks ---
	with gr.Blocks() as demo:
	gr.Markdown("# Basic Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions:

	1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
	2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
	3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
	"""
	)
	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "-"30 + " App Starting " + "-"30)
	space_host_startup = os.getenv("SPACE_HOST")
	space_id_startup = os.getenv("SPACE_ID")

	if space_host_startup:
	print(f"✅ SPACE_HOST found: {space_host_startup}")
	print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
	else:
	print("ℹ️ SPACE_HOST environment variable not found (running locally?).")

	if space_id_startup:
	print(f"✅ SPACE_ID found: {space_id_startup}")
	print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
	print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
	else:
	print("ℹ️ SPACE_ID in environment variable not found (running locally?).")

	print("-"*(60 + len(" App Starting ")) + "\n")
	print("Launching Gradio Interface for Basic Agent Evaluation...")

	demo.launch(debug=True, share=False)