Spaces:

Eng-Musa
/

MusAI

Running

App Files Files Community

MusAI / clients /groq_client.py

Eng-Musa

prevent tokenization issue

f148dea 5 days ago

raw

history blame contribute delete

12.2 kB

	# clients/groq_client.py

	import os
	import json
	from uuid import uuid4
	from groq import Groq
	from langchain_core.documents import Document

	# CHANGED: Replaced HuggingFaceEndpointEmbeddings with HuggingFaceEmbeddings for local inference
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma
	from dotenv import load_dotenv
	import random
	import shutil
	from optimized_quiz import OPTIMIZED_QUESTIONS

	load_dotenv() # load .env variables from root

	# Config
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	DATA_PATH = "data.json" # relative to root, so this works if run from root
	CHROMA_PATH = "chroma_db"

	TEMPERATURE = float(os.getenv("G_TEMPERATURE", 0.7))
	MAX_TOKENS = int(os.getenv("G_MAX_TOKENS", 400))
	RETRIEVE_K = int(os.getenv("G_RETRIEVE_K", 3))
	TOP_P = float(os.getenv("G_TOP_P", 1.0))
	MAX_CONVERSATION_HISTORY = int(os.getenv("G_MAX_CONVERSATION_HISTORY", 5))
	MMR = str(os.getenv("MMR", "mmr"))
	G_FETCH_K = int(os.getenv("G_FETCH_K", 20))
	LAMBDA_MULT = float(os.getenv("LAMBDA_MULT", 0.5))


	class GroqClient:
	def __init__(self):

	self.documents = self.load_json_data(DATA_PATH)
	if not self.documents:
	raise RuntimeError("No data loaded")

	self.vector_store = self.init_vector_store(self.documents)

	self.retriever = self.vector_store.as_retriever(
	search_type=MMR, # Use Maximal Marginal Relevance
	search_kwargs={
	"k": RETRIEVE_K, # Final number of docs to return
	"fetch_k": G_FETCH_K, # Number of docs to initially fetch before filtering for diversity
	"lambda_mult": LAMBDA_MULT, # Balance between relevance (1.0) and diversity (0.0)
	},
	)

	if not GROQ_API_KEY:
	raise RuntimeError("GROQ_API_KEY not found in environment")

	self.client = Groq(api_key=GROQ_API_KEY)

	self.SYSTEM_MESSSAGE = (
	"You are Moses's AI assistant, helpful, knowledgeable, professional, and friendly. "
	"Use only the provided knowledge to answer questions about Moses's background, skills, projects, and experiences. "
	"If knowledge is limited, give the most relevant answer possible without making things up. "
	"Avoid repetitive openings such as 'I'm happy to...' or 'Sure, I'd be glad to...'. "
	"Begin responses naturally, varying the first sentence. "
	"Use third person when the question explicitly asks about Moses."
	"IMPORTANT VOICE GUIDELINES:\n"
	"Always use first person: 'I developed...', 'My experience includes...', 'I'm skilled in...'\n"
	"Only use third person if someone explicitly asks 'Tell me about Moses as a person' or similar formal introductions\n"
	"Speak as if you're having a direct conversation with the visitor\n"
	"Be personable and authentic while staying professional\n"
	"If a response is too brief, expand it contextually while keeping it accurate."
	)

	self.PROMPT_TEMPLATE = """
	Use the following context to answer the question about Moses clearly and in detail.

	Instructions:
	- Avoid starting every response the same way; vary or skip the introduction unless it adds value.
	- Keep answers concise and to the point.
	- Use bullet points for lists.
	- If the question is vague, ask for clarification.
	- If the answer is short but the context allows, expand with relevant details.
	- If unrelated or unanswerable from context, say:
	"{fallback_response}"
	- Give a short follow-up only when it is truly relevant.

	Context:
	{context}

	Question:
	{question}

	Answer:
	"""

	self.GREETINGS_TRIGGERS = {
	"hi",
	"hello",
	"hey",
	"greetings",
	"good morning",
	"good afternoon",
	"good evening",
	"hi?",
	"hello?",
	"hey?",
	"greetings?",
	"good morning?",
	"good afternoon?",
	"good evening?",
	}

	self.GREETINGS = [
	"Hi there! I'm Moses's brainy sidekick. Feel free to ask about his work, skills, projects, or even a bit about his personal life!",
	"Hey! I'm here to help you discover Moses's skills, projects, and professional journey.",
	"Hello! I can answer questions about Moses's work, experience, and what he's been up to. What would you like to know?",
	"Hi! 👋 I'm like Siri, but for Moses 😄 Wanna know what he's good at or what he's been working on? Let's chat! 💬🔍",
	"Greetings, human! 👽 I'm Moses's digital buddy. Ask me anything—skills, projects, secret talents... okay, maybe not too secret 🤫🚀",
	"Sup! 😎 I'm the all-knowing assistant of Moses. Got questions about his work, skills, projects, or even fun facts about him? Ask about what he does, what he's built, or what makes him awesome.",
	]

	self.FALLBACK_RESPONSES = [
	"Hmm, I don't have enough info to answer that right now. But feel free to ask about Moses's skills, projects, or professional experience!",
	"That one's a bit outside my data zone! 😅 Try asking about Moses's work, what he's good at, or cool stuff he's built.",
	"Oops! That question flew over my circuits 🤖💨. But hey, I can tell you all about Moses's projects, skills, or career highlights!",
	"I couldn't find anything on that—yet! Let's try something else like Moses's background, his latest work, or what he's great at.",
	"Either I need a software upgrade or that question's too mysterious 😜. Ask me about Moses's projects, skills, or even a fun fact!",
	]

	self.BLACKLIST = [
	# SQL Injection keywords
	"SELECT",
	"DROP",
	"INSERT",
	"UPDATE",
	"DELETE",
	"ALTER",
	"TRUNCATE",
	"REPLACE",
	"EXEC",
	"EXECUTE",
	"UNION",
	"ALL",
	"CREATE",
	"GRANT",
	"REVOKE",
	"MERGE",
	"--",
	";",
	"/*",
	"*/",
	"@@",
	"@",
	"CHAR(",
	"NCHAR(",
	"VARCHAR(",
	"NVARCHAR(",
	# XSS payload markers
	"<script>",
	"</script>",
	"<img",
	"onerror=",
	"onload=",
	"onclick=",
	"onmouseover=",
	"javascript:",
	"vbscript:",
	"data:text/html",
	"<iframe",
	"</iframe>",
	"<object",
	"<embed",
	# Command injection patterns
	"\|",
	"&",
	"&&",
	"\|\|",
	"$(",
	"`",
	"$(whoami)",
	"$(ls)",
	"$(cat",
	"$(echo",
	# Path traversal
	"../",
	"..\\",
	"%2e%2e/",
	"%2e%2e\\",
	"%2e%2e%2f",
	"%2e%2e%5c",
	# Other suspicious patterns
	"sleep(",
	"benchmark(",
	"load_file(",
	"outfile",
	"dumpfile",
	]

	def load_json_data(self, path):
	try:
	with open(path, "r", encoding="utf-8") as f:
	data = json.load(f)

	documents = []

	if "qa" in data:
	for item in data["qa"]:
	text = f"Q: {item['question']}\nA: {item['answer']}"
	documents.append(
	Document(
	page_content=text,
	metadata={
	"id": item.get("id", str(uuid4())),
	"category": item.get("category", "QA"),
	},
	)
	)

	if "chunks" in data:
	for item in data["chunks"]:
	documents.append(
	Document(
	page_content=item["chunk"],
	metadata={
	"id": item.get("id", str(uuid4())),
	"category": "Chunk",
	},
	)
	)

	return documents

	except Exception as e:
	print(f"Error loading JSON data: {e}")
	return []

	def init_vector_store(self, documents):
	# CHANGED: Replaced online HuggingFaceEndpointEmbeddings with local HuggingFaceEmbeddings
	# This downloads and stores the embedding model locally, eliminating API dependency
	embeddings_model = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'}, # Force CPU usage to avoid GPU conflicts
	encode_kwargs={'normalize_embeddings': True} # Normalize embeddings for better similarity search
	)

	# Clear old data to avoid duplicates
	if os.path.exists(CHROMA_PATH):
	shutil.rmtree(CHROMA_PATH)

	uuids = [str(uuid4()) for _ in documents]

	vector_store = Chroma(
	collection_name="user_data",
	embedding_function=embeddings_model,
	persist_directory=CHROMA_PATH,
	)

	# CHANGED: This now processes embeddings locally instead of making API calls
	vector_store.add_documents(documents=documents, ids=uuids)
	return vector_store

	def handle_unknown_query(self):
	return random.choice(self.FALLBACK_RESPONSES)

	def get_next_questions(self):
	return random.sample(OPTIMIZED_QUESTIONS, 3)

	# ---------------MAIN-----------------

	def ask(self, raw_query: str) -> str:
	q = raw_query
	if q is None or q == "":
	return random.choice(self.FALLBACK_RESPONSES)

	if q.lower() in self.GREETINGS_TRIGGERS:
	return random.choice(self.GREETINGS)

	try:
	docs = self.retriever.invoke(q)
	except Exception as e:
	return f"Error retrieving documents: {e}"

	if not docs:
	return random.choice(self.FALLBACK_RESPONSES)

	context = "\n".join([d.page_content for d in docs])
	fallback = self.handle_unknown_query()
	prompt = self.PROMPT_TEMPLATE.format(
	context=context, question=q, fallback_response=fallback
	)

	messages = [
	{
	"role": "system",
	"content": self.SYSTEM_MESSSAGE,
	},
	] + [
	{"role": "user", "content": prompt},
	]

	# Try multiple models with fallback
	# Add fallback models if compound models fail
	models_to_try = [
	"compound-beta-mini",
	"llama-3.1-8b-instant",
	"gemma2-9b-it",

	]

	random.shuffle(models_to_try)

	for model in models_to_try:
	try:
	completion = self.client.chat.completions.create(
	model=model,
	messages=messages,
	temperature=TEMPERATURE,
	max_completion_tokens=MAX_TOKENS,
	top_p=TOP_P,
	stream=False,
	)
	response = completion.choices[0].message.content
	if response and response.strip():
	return response.strip()
	else:
	continue # Try next model

	except Exception as e:
	# Check if it's a rate limit error
	if "rate_limit_exceeded" in str(e) or "429" in str(e):
	print(f"Rate limit hit for model {model}, trying fallback...")
	continue
	else:
	# For other errors, return immediately
	return f"Error while calling LLM: {e}"

	# If all models fail
	return "I'm temporarily experiencing high demand. Please try again in a few minutes or rephrase your question."