File size: 12,235 Bytes
e26fa15
 
 
 
 
 
 
5014fbd
 
e26fa15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5014fbd
 
 
 
 
 
e26fa15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5014fbd
 
e26fa15
f148dea
 
 
e26fa15
 
 
 
 
 
 
 
 
 
 
 
 
 
5014fbd
e26fa15
 
 
 
 
 
 
5014fbd
e26fa15
 
 
 
 
 
 
 
 
 
 
5014fbd
 
 
 
e26fa15
 
 
 
 
 
 
 
 
 
5014fbd
 
 
 
 
 
 
 
e26fa15
5014fbd
 
 
 
 
 
 
 
e26fa15
5014fbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# clients/groq_client.py

import os
import json
from uuid import uuid4
from groq import Groq
from langchain_core.documents import Document

# CHANGED: Replaced HuggingFaceEndpointEmbeddings with HuggingFaceEmbeddings for local inference
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
import random
import shutil
from optimized_quiz import OPTIMIZED_QUESTIONS

load_dotenv()  # load .env variables from root

# Config
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
DATA_PATH = "data.json"  # relative to root, so this works if run from root
CHROMA_PATH = "chroma_db"

TEMPERATURE = float(os.getenv("G_TEMPERATURE", 0.7))
MAX_TOKENS = int(os.getenv("G_MAX_TOKENS", 400))
RETRIEVE_K = int(os.getenv("G_RETRIEVE_K", 3))
TOP_P = float(os.getenv("G_TOP_P", 1.0))
MAX_CONVERSATION_HISTORY = int(os.getenv("G_MAX_CONVERSATION_HISTORY", 5))
MMR = str(os.getenv("MMR", "mmr"))
G_FETCH_K = int(os.getenv("G_FETCH_K", 20))
LAMBDA_MULT = float(os.getenv("LAMBDA_MULT", 0.5))


class GroqClient:
    def __init__(self):

        self.documents = self.load_json_data(DATA_PATH)
        if not self.documents:
            raise RuntimeError("No data loaded")

        self.vector_store = self.init_vector_store(self.documents)

        self.retriever = self.vector_store.as_retriever(
            search_type=MMR,  # Use Maximal Marginal Relevance
            search_kwargs={
                "k": RETRIEVE_K,  # Final number of docs to return
                "fetch_k": G_FETCH_K,  # Number of docs to initially fetch before filtering for diversity
                "lambda_mult": LAMBDA_MULT,  # Balance between relevance (1.0) and diversity (0.0)
            },
        )

        if not GROQ_API_KEY:
            raise RuntimeError("GROQ_API_KEY not found in environment")

        self.client = Groq(api_key=GROQ_API_KEY)

        self.SYSTEM_MESSSAGE = (
            "You are Moses's AI assistant, helpful, knowledgeable, professional, and friendly. "
            "Use only the provided knowledge to answer questions about Moses's background, skills, projects, and experiences. "
            "If knowledge is limited, give the most relevant answer possible without making things up. "
            "Avoid repetitive openings such as 'I'm happy to...' or 'Sure, I'd be glad to...'. "
            "Begin responses naturally, varying the first sentence. "
            "Use third person when the question explicitly asks about Moses."
            "IMPORTANT VOICE GUIDELINES:\n"
            "Always use first person: 'I developed...', 'My experience includes...', 'I'm skilled in...'\n"
            "Only use third person if someone explicitly asks 'Tell me about Moses as a person' or similar formal introductions\n"
            "Speak as if you're having a direct conversation with the visitor\n"
            "Be personable and authentic while staying professional\n"
            "If a response is too brief, expand it contextually while keeping it accurate."
        )

        self.PROMPT_TEMPLATE = """
Use the following context to answer the question about Moses clearly and in detail.

Instructions:
- Avoid starting every response the same way; vary or skip the introduction unless it adds value.
- Keep answers concise and to the point.
- Use bullet points for lists.
- If the question is vague, ask for clarification.
- If the answer is short but the context allows, expand with relevant details.
- If unrelated or unanswerable from context, say:
  "{fallback_response}"
- Give a short follow-up only when it is truly relevant.

Context:
{context}

Question:
{question}

Answer:
"""

        self.GREETINGS_TRIGGERS = {
            "hi",
            "hello",
            "hey",
            "greetings",
            "good morning",
            "good afternoon",
            "good evening",
            "hi?",
            "hello?",
            "hey?",
            "greetings?",
            "good morning?",
            "good afternoon?",
            "good evening?",
        }

        self.GREETINGS = [
            "Hi there! I'm Moses's brainy sidekick. Feel free to ask about his work, skills, projects, or even a bit about his personal life!",
            "Hey! I'm here to help you discover Moses's skills, projects, and professional journey.",
            "Hello! I can answer questions about Moses's work, experience, and what he's been up to. What would you like to know?",
            "Hi! 👋 I'm like Siri, but for Moses 😄 Wanna know what he's good at or what he's been working on? Let's chat! 💬🔍",
            "Greetings, human! 👽 I'm Moses's digital buddy. Ask me anything—skills, projects, secret talents... okay, maybe not too secret 🤫🚀",
            "Sup! 😎 I'm the all-knowing assistant of Moses. Got questions about his work, skills, projects, or even fun facts about him? Ask about what he does, what he's built, or what makes him awesome.",
        ]

        self.FALLBACK_RESPONSES = [
            "Hmm, I don't have enough info to answer that right now. But feel free to ask about Moses's skills, projects, or professional experience!",
            "That one's a bit outside my data zone! 😅 Try asking about Moses's work, what he's good at, or cool stuff he's built.",
            "Oops! That question flew over my circuits 🤖💨. But hey, I can tell you all about Moses's projects, skills, or career highlights!",
            "I couldn't find anything on that—yet! Let's try something else like Moses's background, his latest work, or what he's great at.",
            "Either I need a software upgrade or that question's too mysterious 😜. Ask me about Moses's projects, skills, or even a fun fact!",
        ]

        self.BLACKLIST = [
            # SQL Injection keywords
            "SELECT",
            "DROP",
            "INSERT",
            "UPDATE",
            "DELETE",
            "ALTER",
            "TRUNCATE",
            "REPLACE",
            "EXEC",
            "EXECUTE",
            "UNION",
            "ALL",
            "CREATE",
            "GRANT",
            "REVOKE",
            "MERGE",
            "--",
            ";",
            "/*",
            "*/",
            "@@",
            "@",
            "CHAR(",
            "NCHAR(",
            "VARCHAR(",
            "NVARCHAR(",
            # XSS payload markers
            "<script>",
            "</script>",
            "<img",
            "onerror=",
            "onload=",
            "onclick=",
            "onmouseover=",
            "javascript:",
            "vbscript:",
            "data:text/html",
            "<iframe",
            "</iframe>",
            "<object",
            "<embed",
            # Command injection patterns
            "|",
            "&",
            "&&",
            "||",
            "$(",
            "`",
            "$(whoami)",
            "$(ls)",
            "$(cat",
            "$(echo",
            # Path traversal
            "../",
            "..\\",
            "%2e%2e/",
            "%2e%2e\\",
            "%2e%2e%2f",
            "%2e%2e%5c",
            # Other suspicious patterns
            "sleep(",
            "benchmark(",
            "load_file(",
            "outfile",
            "dumpfile",
        ]

    def load_json_data(self, path):
        try:
            with open(path, "r", encoding="utf-8") as f:
                data = json.load(f)

            documents = []

            if "qa" in data:
                for item in data["qa"]:
                    text = f"Q: {item['question']}\nA: {item['answer']}"
                    documents.append(
                        Document(
                            page_content=text,
                            metadata={
                                "id": item.get("id", str(uuid4())),
                                "category": item.get("category", "QA"),
                            },
                        )
                    )

            if "chunks" in data:
                for item in data["chunks"]:
                    documents.append(
                        Document(
                            page_content=item["chunk"],
                            metadata={
                                "id": item.get("id", str(uuid4())),
                                "category": "Chunk",
                            },
                        )
                    )

            return documents

        except Exception as e:
            print(f"Error loading JSON data: {e}")
            return []

    def init_vector_store(self, documents):
        # CHANGED: Replaced online HuggingFaceEndpointEmbeddings with local HuggingFaceEmbeddings
        # This downloads and stores the embedding model locally, eliminating API dependency
        embeddings_model = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'},  # Force CPU usage to avoid GPU conflicts
            encode_kwargs={'normalize_embeddings': True}  # Normalize embeddings for better similarity search
        )

        # Clear old data to avoid duplicates
        if os.path.exists(CHROMA_PATH):
            shutil.rmtree(CHROMA_PATH)

        uuids = [str(uuid4()) for _ in documents]

        vector_store = Chroma(
            collection_name="user_data",
            embedding_function=embeddings_model,
            persist_directory=CHROMA_PATH,
        )

        # CHANGED: This now processes embeddings locally instead of making API calls
        vector_store.add_documents(documents=documents, ids=uuids)
        return vector_store

    def handle_unknown_query(self):
        return random.choice(self.FALLBACK_RESPONSES)

    def get_next_questions(self):
        return random.sample(OPTIMIZED_QUESTIONS, 3)

    # ---------------MAIN-----------------

    def ask(self, raw_query: str) -> str:
        q = raw_query
        if q is None or q == "":
            return random.choice(self.FALLBACK_RESPONSES)

        if q.lower() in self.GREETINGS_TRIGGERS:
            return random.choice(self.GREETINGS)

        try:
            docs = self.retriever.invoke(q)
        except Exception as e:
            return f"Error retrieving documents: {e}"

        if not docs:
            return random.choice(self.FALLBACK_RESPONSES)

        context = "\n".join([d.page_content for d in docs])
        fallback = self.handle_unknown_query()
        prompt = self.PROMPT_TEMPLATE.format(
            context=context, question=q, fallback_response=fallback
        )

        messages = [
            {
                "role": "system",
                "content": self.SYSTEM_MESSSAGE,
            },
        ] + [
            {"role": "user", "content": prompt},
        ]

        # Try multiple models with fallback
        # Add fallback models if compound models fail
        models_to_try = [
            "compound-beta-mini",
            "llama-3.1-8b-instant",
            "gemma2-9b-it",
            
        ]

        random.shuffle(models_to_try)

        for model in models_to_try:
            try:
                completion = self.client.chat.completions.create(
                    model=model,
                    messages=messages,
                    temperature=TEMPERATURE,
                    max_completion_tokens=MAX_TOKENS,
                    top_p=TOP_P,
                    stream=False,
                )
                response = completion.choices[0].message.content
                if response and response.strip():
                    return response.strip()
                else:
                    continue  # Try next model

            except Exception as e:
                # Check if it's a rate limit error
                if "rate_limit_exceeded" in str(e) or "429" in str(e):
                    print(f"Rate limit hit for model {model}, trying fallback...")
                    continue
                else:
                    # For other errors, return immediately
                    return f"Error while calling LLM: {e}"

        # If all models fail
        return "I'm temporarily experiencing high demand. Please try again in a few minutes or rephrase your question."