MusAI / clients /groq_client.py
Eng-Musa's picture
prevent tokenization issue
f148dea
# clients/groq_client.py
import os
import json
from uuid import uuid4
from groq import Groq
from langchain_core.documents import Document
# CHANGED: Replaced HuggingFaceEndpointEmbeddings with HuggingFaceEmbeddings for local inference
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
import random
import shutil
from optimized_quiz import OPTIMIZED_QUESTIONS
load_dotenv() # load .env variables from root
# Config
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DATA_PATH = "data.json" # relative to root, so this works if run from root
CHROMA_PATH = "chroma_db"
TEMPERATURE = float(os.getenv("G_TEMPERATURE", 0.7))
MAX_TOKENS = int(os.getenv("G_MAX_TOKENS", 400))
RETRIEVE_K = int(os.getenv("G_RETRIEVE_K", 3))
TOP_P = float(os.getenv("G_TOP_P", 1.0))
MAX_CONVERSATION_HISTORY = int(os.getenv("G_MAX_CONVERSATION_HISTORY", 5))
MMR = str(os.getenv("MMR", "mmr"))
G_FETCH_K = int(os.getenv("G_FETCH_K", 20))
LAMBDA_MULT = float(os.getenv("LAMBDA_MULT", 0.5))
class GroqClient:
def __init__(self):
self.documents = self.load_json_data(DATA_PATH)
if not self.documents:
raise RuntimeError("No data loaded")
self.vector_store = self.init_vector_store(self.documents)
self.retriever = self.vector_store.as_retriever(
search_type=MMR, # Use Maximal Marginal Relevance
search_kwargs={
"k": RETRIEVE_K, # Final number of docs to return
"fetch_k": G_FETCH_K, # Number of docs to initially fetch before filtering for diversity
"lambda_mult": LAMBDA_MULT, # Balance between relevance (1.0) and diversity (0.0)
},
)
if not GROQ_API_KEY:
raise RuntimeError("GROQ_API_KEY not found in environment")
self.client = Groq(api_key=GROQ_API_KEY)
self.SYSTEM_MESSSAGE = (
"You are Moses's AI assistant, helpful, knowledgeable, professional, and friendly. "
"Use only the provided knowledge to answer questions about Moses's background, skills, projects, and experiences. "
"If knowledge is limited, give the most relevant answer possible without making things up. "
"Avoid repetitive openings such as 'I'm happy to...' or 'Sure, I'd be glad to...'. "
"Begin responses naturally, varying the first sentence. "
"Use third person when the question explicitly asks about Moses."
"IMPORTANT VOICE GUIDELINES:\n"
"Always use first person: 'I developed...', 'My experience includes...', 'I'm skilled in...'\n"
"Only use third person if someone explicitly asks 'Tell me about Moses as a person' or similar formal introductions\n"
"Speak as if you're having a direct conversation with the visitor\n"
"Be personable and authentic while staying professional\n"
"If a response is too brief, expand it contextually while keeping it accurate."
)
self.PROMPT_TEMPLATE = """
Use the following context to answer the question about Moses clearly and in detail.
Instructions:
- Avoid starting every response the same way; vary or skip the introduction unless it adds value.
- Keep answers concise and to the point.
- Use bullet points for lists.
- If the question is vague, ask for clarification.
- If the answer is short but the context allows, expand with relevant details.
- If unrelated or unanswerable from context, say:
"{fallback_response}"
- Give a short follow-up only when it is truly relevant.
Context:
{context}
Question:
{question}
Answer:
"""
self.GREETINGS_TRIGGERS = {
"hi",
"hello",
"hey",
"greetings",
"good morning",
"good afternoon",
"good evening",
"hi?",
"hello?",
"hey?",
"greetings?",
"good morning?",
"good afternoon?",
"good evening?",
}
self.GREETINGS = [
"Hi there! I'm Moses's brainy sidekick. Feel free to ask about his work, skills, projects, or even a bit about his personal life!",
"Hey! I'm here to help you discover Moses's skills, projects, and professional journey.",
"Hello! I can answer questions about Moses's work, experience, and what he's been up to. What would you like to know?",
"Hi! 👋 I'm like Siri, but for Moses 😄 Wanna know what he's good at or what he's been working on? Let's chat! 💬🔍",
"Greetings, human! 👽 I'm Moses's digital buddy. Ask me anything—skills, projects, secret talents... okay, maybe not too secret 🤫🚀",
"Sup! 😎 I'm the all-knowing assistant of Moses. Got questions about his work, skills, projects, or even fun facts about him? Ask about what he does, what he's built, or what makes him awesome.",
]
self.FALLBACK_RESPONSES = [
"Hmm, I don't have enough info to answer that right now. But feel free to ask about Moses's skills, projects, or professional experience!",
"That one's a bit outside my data zone! 😅 Try asking about Moses's work, what he's good at, or cool stuff he's built.",
"Oops! That question flew over my circuits 🤖💨. But hey, I can tell you all about Moses's projects, skills, or career highlights!",
"I couldn't find anything on that—yet! Let's try something else like Moses's background, his latest work, or what he's great at.",
"Either I need a software upgrade or that question's too mysterious 😜. Ask me about Moses's projects, skills, or even a fun fact!",
]
self.BLACKLIST = [
# SQL Injection keywords
"SELECT",
"DROP",
"INSERT",
"UPDATE",
"DELETE",
"ALTER",
"TRUNCATE",
"REPLACE",
"EXEC",
"EXECUTE",
"UNION",
"ALL",
"CREATE",
"GRANT",
"REVOKE",
"MERGE",
"--",
";",
"/*",
"*/",
"@@",
"@",
"CHAR(",
"NCHAR(",
"VARCHAR(",
"NVARCHAR(",
# XSS payload markers
"<script>",
"</script>",
"<img",
"onerror=",
"onload=",
"onclick=",
"onmouseover=",
"javascript:",
"vbscript:",
"data:text/html",
"<iframe",
"</iframe>",
"<object",
"<embed",
# Command injection patterns
"|",
"&",
"&&",
"||",
"$(",
"`",
"$(whoami)",
"$(ls)",
"$(cat",
"$(echo",
# Path traversal
"../",
"..\\",
"%2e%2e/",
"%2e%2e\\",
"%2e%2e%2f",
"%2e%2e%5c",
# Other suspicious patterns
"sleep(",
"benchmark(",
"load_file(",
"outfile",
"dumpfile",
]
def load_json_data(self, path):
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
documents = []
if "qa" in data:
for item in data["qa"]:
text = f"Q: {item['question']}\nA: {item['answer']}"
documents.append(
Document(
page_content=text,
metadata={
"id": item.get("id", str(uuid4())),
"category": item.get("category", "QA"),
},
)
)
if "chunks" in data:
for item in data["chunks"]:
documents.append(
Document(
page_content=item["chunk"],
metadata={
"id": item.get("id", str(uuid4())),
"category": "Chunk",
},
)
)
return documents
except Exception as e:
print(f"Error loading JSON data: {e}")
return []
def init_vector_store(self, documents):
# CHANGED: Replaced online HuggingFaceEndpointEmbeddings with local HuggingFaceEmbeddings
# This downloads and stores the embedding model locally, eliminating API dependency
embeddings_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'}, # Force CPU usage to avoid GPU conflicts
encode_kwargs={'normalize_embeddings': True} # Normalize embeddings for better similarity search
)
# Clear old data to avoid duplicates
if os.path.exists(CHROMA_PATH):
shutil.rmtree(CHROMA_PATH)
uuids = [str(uuid4()) for _ in documents]
vector_store = Chroma(
collection_name="user_data",
embedding_function=embeddings_model,
persist_directory=CHROMA_PATH,
)
# CHANGED: This now processes embeddings locally instead of making API calls
vector_store.add_documents(documents=documents, ids=uuids)
return vector_store
def handle_unknown_query(self):
return random.choice(self.FALLBACK_RESPONSES)
def get_next_questions(self):
return random.sample(OPTIMIZED_QUESTIONS, 3)
# ---------------MAIN-----------------
def ask(self, raw_query: str) -> str:
q = raw_query
if q is None or q == "":
return random.choice(self.FALLBACK_RESPONSES)
if q.lower() in self.GREETINGS_TRIGGERS:
return random.choice(self.GREETINGS)
try:
docs = self.retriever.invoke(q)
except Exception as e:
return f"Error retrieving documents: {e}"
if not docs:
return random.choice(self.FALLBACK_RESPONSES)
context = "\n".join([d.page_content for d in docs])
fallback = self.handle_unknown_query()
prompt = self.PROMPT_TEMPLATE.format(
context=context, question=q, fallback_response=fallback
)
messages = [
{
"role": "system",
"content": self.SYSTEM_MESSSAGE,
},
] + [
{"role": "user", "content": prompt},
]
# Try multiple models with fallback
# Add fallback models if compound models fail
models_to_try = [
"compound-beta-mini",
"llama-3.1-8b-instant",
"gemma2-9b-it",
]
random.shuffle(models_to_try)
for model in models_to_try:
try:
completion = self.client.chat.completions.create(
model=model,
messages=messages,
temperature=TEMPERATURE,
max_completion_tokens=MAX_TOKENS,
top_p=TOP_P,
stream=False,
)
response = completion.choices[0].message.content
if response and response.strip():
return response.strip()
else:
continue # Try next model
except Exception as e:
# Check if it's a rate limit error
if "rate_limit_exceeded" in str(e) or "429" in str(e):
print(f"Rate limit hit for model {model}, trying fallback...")
continue
else:
# For other errors, return immediately
return f"Error while calling LLM: {e}"
# If all models fail
return "I'm temporarily experiencing high demand. Please try again in a few minutes or rephrase your question."