|
|
|
|
|
import os |
|
import json |
|
from uuid import uuid4 |
|
from groq import Groq |
|
from langchain_core.documents import Document |
|
|
|
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_chroma import Chroma |
|
from dotenv import load_dotenv |
|
import random |
|
import shutil |
|
from optimized_quiz import OPTIMIZED_QUESTIONS |
|
|
|
load_dotenv() |
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
DATA_PATH = "data.json" |
|
CHROMA_PATH = "chroma_db" |
|
|
|
TEMPERATURE = float(os.getenv("G_TEMPERATURE", 0.7)) |
|
MAX_TOKENS = int(os.getenv("G_MAX_TOKENS", 400)) |
|
RETRIEVE_K = int(os.getenv("G_RETRIEVE_K", 3)) |
|
TOP_P = float(os.getenv("G_TOP_P", 1.0)) |
|
MAX_CONVERSATION_HISTORY = int(os.getenv("G_MAX_CONVERSATION_HISTORY", 5)) |
|
MMR = str(os.getenv("MMR", "mmr")) |
|
G_FETCH_K = int(os.getenv("G_FETCH_K", 20)) |
|
LAMBDA_MULT = float(os.getenv("LAMBDA_MULT", 0.5)) |
|
|
|
|
|
class GroqClient: |
|
def __init__(self): |
|
|
|
self.documents = self.load_json_data(DATA_PATH) |
|
if not self.documents: |
|
raise RuntimeError("No data loaded") |
|
|
|
self.vector_store = self.init_vector_store(self.documents) |
|
|
|
self.retriever = self.vector_store.as_retriever( |
|
search_type=MMR, |
|
search_kwargs={ |
|
"k": RETRIEVE_K, |
|
"fetch_k": G_FETCH_K, |
|
"lambda_mult": LAMBDA_MULT, |
|
}, |
|
) |
|
|
|
if not GROQ_API_KEY: |
|
raise RuntimeError("GROQ_API_KEY not found in environment") |
|
|
|
self.client = Groq(api_key=GROQ_API_KEY) |
|
|
|
self.SYSTEM_MESSSAGE = ( |
|
"You are Moses's AI assistant, helpful, knowledgeable, professional, and friendly. " |
|
"Use only the provided knowledge to answer questions about Moses's background, skills, projects, and experiences. " |
|
"If knowledge is limited, give the most relevant answer possible without making things up. " |
|
"Avoid repetitive openings such as 'I'm happy to...' or 'Sure, I'd be glad to...'. " |
|
"Begin responses naturally, varying the first sentence. " |
|
"Use third person when the question explicitly asks about Moses." |
|
"IMPORTANT VOICE GUIDELINES:\n" |
|
"Always use first person: 'I developed...', 'My experience includes...', 'I'm skilled in...'\n" |
|
"Only use third person if someone explicitly asks 'Tell me about Moses as a person' or similar formal introductions\n" |
|
"Speak as if you're having a direct conversation with the visitor\n" |
|
"Be personable and authentic while staying professional\n" |
|
"If a response is too brief, expand it contextually while keeping it accurate." |
|
) |
|
|
|
self.PROMPT_TEMPLATE = """ |
|
Use the following context to answer the question about Moses clearly and in detail. |
|
|
|
Instructions: |
|
- Avoid starting every response the same way; vary or skip the introduction unless it adds value. |
|
- Keep answers concise and to the point. |
|
- Use bullet points for lists. |
|
- If the question is vague, ask for clarification. |
|
- If the answer is short but the context allows, expand with relevant details. |
|
- If unrelated or unanswerable from context, say: |
|
"{fallback_response}" |
|
- Give a short follow-up only when it is truly relevant. |
|
|
|
Context: |
|
{context} |
|
|
|
Question: |
|
{question} |
|
|
|
Answer: |
|
""" |
|
|
|
self.GREETINGS_TRIGGERS = { |
|
"hi", |
|
"hello", |
|
"hey", |
|
"greetings", |
|
"good morning", |
|
"good afternoon", |
|
"good evening", |
|
"hi?", |
|
"hello?", |
|
"hey?", |
|
"greetings?", |
|
"good morning?", |
|
"good afternoon?", |
|
"good evening?", |
|
} |
|
|
|
self.GREETINGS = [ |
|
"Hi there! I'm Moses's brainy sidekick. Feel free to ask about his work, skills, projects, or even a bit about his personal life!", |
|
"Hey! I'm here to help you discover Moses's skills, projects, and professional journey.", |
|
"Hello! I can answer questions about Moses's work, experience, and what he's been up to. What would you like to know?", |
|
"Hi! 👋 I'm like Siri, but for Moses 😄 Wanna know what he's good at or what he's been working on? Let's chat! 💬🔍", |
|
"Greetings, human! 👽 I'm Moses's digital buddy. Ask me anything—skills, projects, secret talents... okay, maybe not too secret 🤫🚀", |
|
"Sup! 😎 I'm the all-knowing assistant of Moses. Got questions about his work, skills, projects, or even fun facts about him? Ask about what he does, what he's built, or what makes him awesome.", |
|
] |
|
|
|
self.FALLBACK_RESPONSES = [ |
|
"Hmm, I don't have enough info to answer that right now. But feel free to ask about Moses's skills, projects, or professional experience!", |
|
"That one's a bit outside my data zone! 😅 Try asking about Moses's work, what he's good at, or cool stuff he's built.", |
|
"Oops! That question flew over my circuits 🤖💨. But hey, I can tell you all about Moses's projects, skills, or career highlights!", |
|
"I couldn't find anything on that—yet! Let's try something else like Moses's background, his latest work, or what he's great at.", |
|
"Either I need a software upgrade or that question's too mysterious 😜. Ask me about Moses's projects, skills, or even a fun fact!", |
|
] |
|
|
|
self.BLACKLIST = [ |
|
|
|
"SELECT", |
|
"DROP", |
|
"INSERT", |
|
"UPDATE", |
|
"DELETE", |
|
"ALTER", |
|
"TRUNCATE", |
|
"REPLACE", |
|
"EXEC", |
|
"EXECUTE", |
|
"UNION", |
|
"ALL", |
|
"CREATE", |
|
"GRANT", |
|
"REVOKE", |
|
"MERGE", |
|
"--", |
|
";", |
|
"/*", |
|
"*/", |
|
"@@", |
|
"@", |
|
"CHAR(", |
|
"NCHAR(", |
|
"VARCHAR(", |
|
"NVARCHAR(", |
|
|
|
"<script>", |
|
"</script>", |
|
"<img", |
|
"onerror=", |
|
"onload=", |
|
"onclick=", |
|
"onmouseover=", |
|
"javascript:", |
|
"vbscript:", |
|
"data:text/html", |
|
"<iframe", |
|
"</iframe>", |
|
"<object", |
|
"<embed", |
|
|
|
"|", |
|
"&", |
|
"&&", |
|
"||", |
|
"$(", |
|
"`", |
|
"$(whoami)", |
|
"$(ls)", |
|
"$(cat", |
|
"$(echo", |
|
|
|
"../", |
|
"..\\", |
|
"%2e%2e/", |
|
"%2e%2e\\", |
|
"%2e%2e%2f", |
|
"%2e%2e%5c", |
|
|
|
"sleep(", |
|
"benchmark(", |
|
"load_file(", |
|
"outfile", |
|
"dumpfile", |
|
] |
|
|
|
def load_json_data(self, path): |
|
try: |
|
with open(path, "r", encoding="utf-8") as f: |
|
data = json.load(f) |
|
|
|
documents = [] |
|
|
|
if "qa" in data: |
|
for item in data["qa"]: |
|
text = f"Q: {item['question']}\nA: {item['answer']}" |
|
documents.append( |
|
Document( |
|
page_content=text, |
|
metadata={ |
|
"id": item.get("id", str(uuid4())), |
|
"category": item.get("category", "QA"), |
|
}, |
|
) |
|
) |
|
|
|
if "chunks" in data: |
|
for item in data["chunks"]: |
|
documents.append( |
|
Document( |
|
page_content=item["chunk"], |
|
metadata={ |
|
"id": item.get("id", str(uuid4())), |
|
"category": "Chunk", |
|
}, |
|
) |
|
) |
|
|
|
return documents |
|
|
|
except Exception as e: |
|
print(f"Error loading JSON data: {e}") |
|
return [] |
|
|
|
def init_vector_store(self, documents): |
|
|
|
|
|
embeddings_model = HuggingFaceEmbeddings( |
|
model_name="sentence-transformers/all-MiniLM-L6-v2", |
|
model_kwargs={'device': 'cpu'}, |
|
encode_kwargs={'normalize_embeddings': True} |
|
) |
|
|
|
|
|
if os.path.exists(CHROMA_PATH): |
|
shutil.rmtree(CHROMA_PATH) |
|
|
|
uuids = [str(uuid4()) for _ in documents] |
|
|
|
vector_store = Chroma( |
|
collection_name="user_data", |
|
embedding_function=embeddings_model, |
|
persist_directory=CHROMA_PATH, |
|
) |
|
|
|
|
|
vector_store.add_documents(documents=documents, ids=uuids) |
|
return vector_store |
|
|
|
def handle_unknown_query(self): |
|
return random.choice(self.FALLBACK_RESPONSES) |
|
|
|
def get_next_questions(self): |
|
return random.sample(OPTIMIZED_QUESTIONS, 3) |
|
|
|
|
|
|
|
def ask(self, raw_query: str) -> str: |
|
q = raw_query |
|
if q is None or q == "": |
|
return random.choice(self.FALLBACK_RESPONSES) |
|
|
|
if q.lower() in self.GREETINGS_TRIGGERS: |
|
return random.choice(self.GREETINGS) |
|
|
|
try: |
|
docs = self.retriever.invoke(q) |
|
except Exception as e: |
|
return f"Error retrieving documents: {e}" |
|
|
|
if not docs: |
|
return random.choice(self.FALLBACK_RESPONSES) |
|
|
|
context = "\n".join([d.page_content for d in docs]) |
|
fallback = self.handle_unknown_query() |
|
prompt = self.PROMPT_TEMPLATE.format( |
|
context=context, question=q, fallback_response=fallback |
|
) |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": self.SYSTEM_MESSSAGE, |
|
}, |
|
] + [ |
|
{"role": "user", "content": prompt}, |
|
] |
|
|
|
|
|
|
|
models_to_try = [ |
|
"compound-beta-mini", |
|
"llama-3.1-8b-instant", |
|
"gemma2-9b-it", |
|
|
|
] |
|
|
|
random.shuffle(models_to_try) |
|
|
|
for model in models_to_try: |
|
try: |
|
completion = self.client.chat.completions.create( |
|
model=model, |
|
messages=messages, |
|
temperature=TEMPERATURE, |
|
max_completion_tokens=MAX_TOKENS, |
|
top_p=TOP_P, |
|
stream=False, |
|
) |
|
response = completion.choices[0].message.content |
|
if response and response.strip(): |
|
return response.strip() |
|
else: |
|
continue |
|
|
|
except Exception as e: |
|
|
|
if "rate_limit_exceeded" in str(e) or "429" in str(e): |
|
print(f"Rate limit hit for model {model}, trying fallback...") |
|
continue |
|
else: |
|
|
|
return f"Error while calling LLM: {e}" |
|
|
|
|
|
return "I'm temporarily experiencing high demand. Please try again in a few minutes or rephrase your question." |