# clients/groq_client.py import os import json from uuid import uuid4 from groq import Groq from langchain_core.documents import Document # CHANGED: Replaced HuggingFaceEndpointEmbeddings with HuggingFaceEmbeddings for local inference from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from dotenv import load_dotenv import random import shutil from optimized_quiz import OPTIMIZED_QUESTIONS load_dotenv() # load .env variables from root # Config GROQ_API_KEY = os.getenv("GROQ_API_KEY") DATA_PATH = "data.json" # relative to root, so this works if run from root CHROMA_PATH = "chroma_db" TEMPERATURE = float(os.getenv("G_TEMPERATURE", 0.7)) MAX_TOKENS = int(os.getenv("G_MAX_TOKENS", 400)) RETRIEVE_K = int(os.getenv("G_RETRIEVE_K", 3)) TOP_P = float(os.getenv("G_TOP_P", 1.0)) MAX_CONVERSATION_HISTORY = int(os.getenv("G_MAX_CONVERSATION_HISTORY", 5)) MMR = str(os.getenv("MMR", "mmr")) G_FETCH_K = int(os.getenv("G_FETCH_K", 20)) LAMBDA_MULT = float(os.getenv("LAMBDA_MULT", 0.5)) class GroqClient: def __init__(self): self.documents = self.load_json_data(DATA_PATH) if not self.documents: raise RuntimeError("No data loaded") self.vector_store = self.init_vector_store(self.documents) self.retriever = self.vector_store.as_retriever( search_type=MMR, # Use Maximal Marginal Relevance search_kwargs={ "k": RETRIEVE_K, # Final number of docs to return "fetch_k": G_FETCH_K, # Number of docs to initially fetch before filtering for diversity "lambda_mult": LAMBDA_MULT, # Balance between relevance (1.0) and diversity (0.0) }, ) if not GROQ_API_KEY: raise RuntimeError("GROQ_API_KEY not found in environment") self.client = Groq(api_key=GROQ_API_KEY) self.SYSTEM_MESSSAGE = ( "You are Moses's AI assistant, helpful, knowledgeable, professional, and friendly. " "Use only the provided knowledge to answer questions about Moses's background, skills, projects, and experiences. " "If knowledge is limited, give the most relevant answer possible without making things up. " "Avoid repetitive openings such as 'I'm happy to...' or 'Sure, I'd be glad to...'. " "Begin responses naturally, varying the first sentence. " "Use third person when the question explicitly asks about Moses." "IMPORTANT VOICE GUIDELINES:\n" "Always use first person: 'I developed...', 'My experience includes...', 'I'm skilled in...'\n" "Only use third person if someone explicitly asks 'Tell me about Moses as a person' or similar formal introductions\n" "Speak as if you're having a direct conversation with the visitor\n" "Be personable and authentic while staying professional\n" "If a response is too brief, expand it contextually while keeping it accurate." ) self.PROMPT_TEMPLATE = """ Use the following context to answer the question about Moses clearly and in detail. Instructions: - Avoid starting every response the same way; vary or skip the introduction unless it adds value. - Keep answers concise and to the point. - Use bullet points for lists. - If the question is vague, ask for clarification. - If the answer is short but the context allows, expand with relevant details. - If unrelated or unanswerable from context, say: "{fallback_response}" - Give a short follow-up only when it is truly relevant. Context: {context} Question: {question} Answer: """ self.GREETINGS_TRIGGERS = { "hi", "hello", "hey", "greetings", "good morning", "good afternoon", "good evening", "hi?", "hello?", "hey?", "greetings?", "good morning?", "good afternoon?", "good evening?", } self.GREETINGS = [ "Hi there! I'm Moses's brainy sidekick. Feel free to ask about his work, skills, projects, or even a bit about his personal life!", "Hey! I'm here to help you discover Moses's skills, projects, and professional journey.", "Hello! I can answer questions about Moses's work, experience, and what he's been up to. What would you like to know?", "Hi! ๐Ÿ‘‹ I'm like Siri, but for Moses ๐Ÿ˜„ Wanna know what he's good at or what he's been working on? Let's chat! ๐Ÿ’ฌ๐Ÿ”", "Greetings, human! ๐Ÿ‘ฝ I'm Moses's digital buddy. Ask me anythingโ€”skills, projects, secret talents... okay, maybe not too secret ๐Ÿคซ๐Ÿš€", "Sup! ๐Ÿ˜Ž I'm the all-knowing assistant of Moses. Got questions about his work, skills, projects, or even fun facts about him? Ask about what he does, what he's built, or what makes him awesome.", ] self.FALLBACK_RESPONSES = [ "Hmm, I don't have enough info to answer that right now. But feel free to ask about Moses's skills, projects, or professional experience!", "That one's a bit outside my data zone! ๐Ÿ˜… Try asking about Moses's work, what he's good at, or cool stuff he's built.", "Oops! That question flew over my circuits ๐Ÿค–๐Ÿ’จ. But hey, I can tell you all about Moses's projects, skills, or career highlights!", "I couldn't find anything on thatโ€”yet! Let's try something else like Moses's background, his latest work, or what he's great at.", "Either I need a software upgrade or that question's too mysterious ๐Ÿ˜œ. Ask me about Moses's projects, skills, or even a fun fact!", ] self.BLACKLIST = [ # SQL Injection keywords "SELECT", "DROP", "INSERT", "UPDATE", "DELETE", "ALTER", "TRUNCATE", "REPLACE", "EXEC", "EXECUTE", "UNION", "ALL", "CREATE", "GRANT", "REVOKE", "MERGE", "--", ";", "/*", "*/", "@@", "@", "CHAR(", "NCHAR(", "VARCHAR(", "NVARCHAR(", # XSS payload markers "", "", " str: q = raw_query if q is None or q == "": return random.choice(self.FALLBACK_RESPONSES) if q.lower() in self.GREETINGS_TRIGGERS: return random.choice(self.GREETINGS) try: docs = self.retriever.invoke(q) except Exception as e: return f"Error retrieving documents: {e}" if not docs: return random.choice(self.FALLBACK_RESPONSES) context = "\n".join([d.page_content for d in docs]) fallback = self.handle_unknown_query() prompt = self.PROMPT_TEMPLATE.format( context=context, question=q, fallback_response=fallback ) messages = [ { "role": "system", "content": self.SYSTEM_MESSSAGE, }, ] + [ {"role": "user", "content": prompt}, ] # Try multiple models with fallback # Add fallback models if compound models fail models_to_try = [ "compound-beta-mini", "llama-3.1-8b-instant", "gemma2-9b-it", ] random.shuffle(models_to_try) for model in models_to_try: try: completion = self.client.chat.completions.create( model=model, messages=messages, temperature=TEMPERATURE, max_completion_tokens=MAX_TOKENS, top_p=TOP_P, stream=False, ) response = completion.choices[0].message.content if response and response.strip(): return response.strip() else: continue # Try next model except Exception as e: # Check if it's a rate limit error if "rate_limit_exceeded" in str(e) or "429" in str(e): print(f"Rate limit hit for model {model}, trying fallback...") continue else: # For other errors, return immediately return f"Error while calling LLM: {e}" # If all models fail return "I'm temporarily experiencing high demand. Please try again in a few minutes or rephrase your question."