File size: 12,235 Bytes
e26fa15 5014fbd e26fa15 5014fbd e26fa15 5014fbd e26fa15 f148dea e26fa15 5014fbd e26fa15 5014fbd e26fa15 5014fbd e26fa15 5014fbd e26fa15 5014fbd e26fa15 5014fbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
# clients/groq_client.py
import os
import json
from uuid import uuid4
from groq import Groq
from langchain_core.documents import Document
# CHANGED: Replaced HuggingFaceEndpointEmbeddings with HuggingFaceEmbeddings for local inference
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
import random
import shutil
from optimized_quiz import OPTIMIZED_QUESTIONS
load_dotenv() # load .env variables from root
# Config
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DATA_PATH = "data.json" # relative to root, so this works if run from root
CHROMA_PATH = "chroma_db"
TEMPERATURE = float(os.getenv("G_TEMPERATURE", 0.7))
MAX_TOKENS = int(os.getenv("G_MAX_TOKENS", 400))
RETRIEVE_K = int(os.getenv("G_RETRIEVE_K", 3))
TOP_P = float(os.getenv("G_TOP_P", 1.0))
MAX_CONVERSATION_HISTORY = int(os.getenv("G_MAX_CONVERSATION_HISTORY", 5))
MMR = str(os.getenv("MMR", "mmr"))
G_FETCH_K = int(os.getenv("G_FETCH_K", 20))
LAMBDA_MULT = float(os.getenv("LAMBDA_MULT", 0.5))
class GroqClient:
def __init__(self):
self.documents = self.load_json_data(DATA_PATH)
if not self.documents:
raise RuntimeError("No data loaded")
self.vector_store = self.init_vector_store(self.documents)
self.retriever = self.vector_store.as_retriever(
search_type=MMR, # Use Maximal Marginal Relevance
search_kwargs={
"k": RETRIEVE_K, # Final number of docs to return
"fetch_k": G_FETCH_K, # Number of docs to initially fetch before filtering for diversity
"lambda_mult": LAMBDA_MULT, # Balance between relevance (1.0) and diversity (0.0)
},
)
if not GROQ_API_KEY:
raise RuntimeError("GROQ_API_KEY not found in environment")
self.client = Groq(api_key=GROQ_API_KEY)
self.SYSTEM_MESSSAGE = (
"You are Moses's AI assistant, helpful, knowledgeable, professional, and friendly. "
"Use only the provided knowledge to answer questions about Moses's background, skills, projects, and experiences. "
"If knowledge is limited, give the most relevant answer possible without making things up. "
"Avoid repetitive openings such as 'I'm happy to...' or 'Sure, I'd be glad to...'. "
"Begin responses naturally, varying the first sentence. "
"Use third person when the question explicitly asks about Moses."
"IMPORTANT VOICE GUIDELINES:\n"
"Always use first person: 'I developed...', 'My experience includes...', 'I'm skilled in...'\n"
"Only use third person if someone explicitly asks 'Tell me about Moses as a person' or similar formal introductions\n"
"Speak as if you're having a direct conversation with the visitor\n"
"Be personable and authentic while staying professional\n"
"If a response is too brief, expand it contextually while keeping it accurate."
)
self.PROMPT_TEMPLATE = """
Use the following context to answer the question about Moses clearly and in detail.
Instructions:
- Avoid starting every response the same way; vary or skip the introduction unless it adds value.
- Keep answers concise and to the point.
- Use bullet points for lists.
- If the question is vague, ask for clarification.
- If the answer is short but the context allows, expand with relevant details.
- If unrelated or unanswerable from context, say:
"{fallback_response}"
- Give a short follow-up only when it is truly relevant.
Context:
{context}
Question:
{question}
Answer:
"""
self.GREETINGS_TRIGGERS = {
"hi",
"hello",
"hey",
"greetings",
"good morning",
"good afternoon",
"good evening",
"hi?",
"hello?",
"hey?",
"greetings?",
"good morning?",
"good afternoon?",
"good evening?",
}
self.GREETINGS = [
"Hi there! I'm Moses's brainy sidekick. Feel free to ask about his work, skills, projects, or even a bit about his personal life!",
"Hey! I'm here to help you discover Moses's skills, projects, and professional journey.",
"Hello! I can answer questions about Moses's work, experience, and what he's been up to. What would you like to know?",
"Hi! 👋 I'm like Siri, but for Moses 😄 Wanna know what he's good at or what he's been working on? Let's chat! 💬🔍",
"Greetings, human! 👽 I'm Moses's digital buddy. Ask me anything—skills, projects, secret talents... okay, maybe not too secret 🤫🚀",
"Sup! 😎 I'm the all-knowing assistant of Moses. Got questions about his work, skills, projects, or even fun facts about him? Ask about what he does, what he's built, or what makes him awesome.",
]
self.FALLBACK_RESPONSES = [
"Hmm, I don't have enough info to answer that right now. But feel free to ask about Moses's skills, projects, or professional experience!",
"That one's a bit outside my data zone! 😅 Try asking about Moses's work, what he's good at, or cool stuff he's built.",
"Oops! That question flew over my circuits 🤖💨. But hey, I can tell you all about Moses's projects, skills, or career highlights!",
"I couldn't find anything on that—yet! Let's try something else like Moses's background, his latest work, or what he's great at.",
"Either I need a software upgrade or that question's too mysterious 😜. Ask me about Moses's projects, skills, or even a fun fact!",
]
self.BLACKLIST = [
# SQL Injection keywords
"SELECT",
"DROP",
"INSERT",
"UPDATE",
"DELETE",
"ALTER",
"TRUNCATE",
"REPLACE",
"EXEC",
"EXECUTE",
"UNION",
"ALL",
"CREATE",
"GRANT",
"REVOKE",
"MERGE",
"--",
";",
"/*",
"*/",
"@@",
"@",
"CHAR(",
"NCHAR(",
"VARCHAR(",
"NVARCHAR(",
# XSS payload markers
"<script>",
"</script>",
"<img",
"onerror=",
"onload=",
"onclick=",
"onmouseover=",
"javascript:",
"vbscript:",
"data:text/html",
"<iframe",
"</iframe>",
"<object",
"<embed",
# Command injection patterns
"|",
"&",
"&&",
"||",
"$(",
"`",
"$(whoami)",
"$(ls)",
"$(cat",
"$(echo",
# Path traversal
"../",
"..\\",
"%2e%2e/",
"%2e%2e\\",
"%2e%2e%2f",
"%2e%2e%5c",
# Other suspicious patterns
"sleep(",
"benchmark(",
"load_file(",
"outfile",
"dumpfile",
]
def load_json_data(self, path):
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
documents = []
if "qa" in data:
for item in data["qa"]:
text = f"Q: {item['question']}\nA: {item['answer']}"
documents.append(
Document(
page_content=text,
metadata={
"id": item.get("id", str(uuid4())),
"category": item.get("category", "QA"),
},
)
)
if "chunks" in data:
for item in data["chunks"]:
documents.append(
Document(
page_content=item["chunk"],
metadata={
"id": item.get("id", str(uuid4())),
"category": "Chunk",
},
)
)
return documents
except Exception as e:
print(f"Error loading JSON data: {e}")
return []
def init_vector_store(self, documents):
# CHANGED: Replaced online HuggingFaceEndpointEmbeddings with local HuggingFaceEmbeddings
# This downloads and stores the embedding model locally, eliminating API dependency
embeddings_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'}, # Force CPU usage to avoid GPU conflicts
encode_kwargs={'normalize_embeddings': True} # Normalize embeddings for better similarity search
)
# Clear old data to avoid duplicates
if os.path.exists(CHROMA_PATH):
shutil.rmtree(CHROMA_PATH)
uuids = [str(uuid4()) for _ in documents]
vector_store = Chroma(
collection_name="user_data",
embedding_function=embeddings_model,
persist_directory=CHROMA_PATH,
)
# CHANGED: This now processes embeddings locally instead of making API calls
vector_store.add_documents(documents=documents, ids=uuids)
return vector_store
def handle_unknown_query(self):
return random.choice(self.FALLBACK_RESPONSES)
def get_next_questions(self):
return random.sample(OPTIMIZED_QUESTIONS, 3)
# ---------------MAIN-----------------
def ask(self, raw_query: str) -> str:
q = raw_query
if q is None or q == "":
return random.choice(self.FALLBACK_RESPONSES)
if q.lower() in self.GREETINGS_TRIGGERS:
return random.choice(self.GREETINGS)
try:
docs = self.retriever.invoke(q)
except Exception as e:
return f"Error retrieving documents: {e}"
if not docs:
return random.choice(self.FALLBACK_RESPONSES)
context = "\n".join([d.page_content for d in docs])
fallback = self.handle_unknown_query()
prompt = self.PROMPT_TEMPLATE.format(
context=context, question=q, fallback_response=fallback
)
messages = [
{
"role": "system",
"content": self.SYSTEM_MESSSAGE,
},
] + [
{"role": "user", "content": prompt},
]
# Try multiple models with fallback
# Add fallback models if compound models fail
models_to_try = [
"compound-beta-mini",
"llama-3.1-8b-instant",
"gemma2-9b-it",
]
random.shuffle(models_to_try)
for model in models_to_try:
try:
completion = self.client.chat.completions.create(
model=model,
messages=messages,
temperature=TEMPERATURE,
max_completion_tokens=MAX_TOKENS,
top_p=TOP_P,
stream=False,
)
response = completion.choices[0].message.content
if response and response.strip():
return response.strip()
else:
continue # Try next model
except Exception as e:
# Check if it's a rate limit error
if "rate_limit_exceeded" in str(e) or "429" in str(e):
print(f"Rate limit hit for model {model}, trying fallback...")
continue
else:
# For other errors, return immediately
return f"Error while calling LLM: {e}"
# If all models fail
return "I'm temporarily experiencing high demand. Please try again in a few minutes or rephrase your question." |