bwilkie's picture
Update myagent.py
0006d3c verified
import os
from smolagents import CodeAgent, ToolCallingAgent
from smolagents import OpenAIServerModel
from tools.fetch import fetch_webpage
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
import myprompts
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
# --- Basic Agent Definition ---
# Basic model wrapper for local inference with debug info
class BasicAgent:
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
self.device = model.device if hasattr(model, 'device') else 'cpu'
print(f"Model device: {self.device}")
def _extract_prompt(self, prompt):
if isinstance(prompt, str):
return prompt
elif isinstance(prompt, list):
# Convert list of ChatMessages or dicts to plain text
return "\n".join(
msg.content if hasattr(msg, "content") else msg.get("content", str(msg))
for msg in prompt
)
else:
return str(prompt)
def generate(self, prompt, max_new_tokens=512):
try:
print("\n[DEBUG] Raw prompt input:", prompt)
text_prompt = self._extract_prompt(prompt)
print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)
inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device)
input_ids = inputs["input_ids"]
print("[DEBUG] Tokenized input shape:", input_ids.shape)
with torch.no_grad():
output = self.model.generate(
input_ids=input_ids,
do_sample=True,
temperature=0.3,
min_p=0.15,
repetition_penalty=1.05,
max_new_tokens=max_new_tokens,
pad_token_id=self.tokenizer.eos_token_id,
)
new_tokens = output[0][len(input_ids[0]):]
decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
print("[DEBUG] Decoded output:", decoded.strip())
return decoded.strip()
except Exception as e:
print(f"[ERROR] Generation failed: {e}")
return f"Error generating response: {e}"
def __call__(self, prompt, max_new_tokens=512):
return self.generate(prompt, max_new_tokens)
# Load your model and tokenizer
def load_model(model_id="LiquidAI/LFM2-1.2B"):
print(f"Loading model: {model_id}")
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
return BasicAgent(model, tokenizer)
# Run minimal test
if __name__ == "__main__":
model = load_model()
# Example prompt
prompt = "What is the capital of France?"
print("\n[TEST] Asking a simple question...")
response = model(prompt)
print("\nFinal Answer:", response)