File size: 9,954 Bytes
501d7ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
015031c
 
 
 
 
 
 
 
 
afcfc32
015031c
501d7ec
015031c
 
 
501d7ec
174598b
501d7ec
 
 
015031c
 
 
 
 
 
 
afcfc32
 
 
 
 
 
 
015031c
 
afcfc32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
015031c
 
 
afcfc32
015031c
afcfc32
015031c
 
 
5d53041
 
 
5daaf5c
 
015031c
 
 
 
 
 
501d7ec
015031c
 
 
 
 
 
 
 
 
 
 
 
501d7ec
015031c
 
 
 
 
 
 
 
 
 
 
 
 
da8ed0f
015031c
 
 
 
 
 
 
 
 
 
 
 
 
 
501d7ec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# import os
# import sys
# import requests
# import gradio as gr
# from langchain.chains import ConversationalRetrievalChain, LLMChain
# from langchain.vectorstores import Chroma
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.prompts import PromptTemplate
# from langchain.chains.question_answering import load_qa_chain
# from langchain.llms.base import LLM

# # πŸ‘‡ Hugging Face sqlite3 workaround
# __import__('pysqlite3')
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# # πŸ” Load DeepSeek API key from Hugging Face secrets
# DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
# if DEEPSEEK_API_KEY is None:
#     raise ValueError("Missing DEEPSEEK_API_KEY in environment.")

# # 🧠 Load vector DB from ./db
# embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# vectorstore = Chroma(
#     persist_directory="./db",
#     embedding_function=embedding_function
# )

# # πŸ”· Wrap DeepSeek into LangChain LLM
# class DeepSeekLLM(LLM):
#     api_key: str = DEEPSEEK_API_KEY

#     def _call(self, prompt, stop=None, run_manager=None, **kwargs):
#         headers = {
#             "Authorization": f"Bearer {self.api_key}",
#             "Content-Type": "application/json"
#         }
#         payload = {
#             "model": "deepseek-chat",
#             "messages": [
#                 {"role": "system", "content": "You are a helpful assistant."},
#                 {"role": "user", "content": prompt}
#             ],
#             "temperature": 0.7,
#             "max_tokens": 512
#         }
#         response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload)
#         response.raise_for_status()
#         return response.json()["choices"][0]["message"]["content"].strip()

#     @property
#     def _llm_type(self):
#         return "deepseek_api"

# llm = DeepSeekLLM()

# # ✨ Prompt template
# prompt = PromptTemplate.from_template("""
# You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
# Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
# this information is available on their website anyway, so it does not count as sensitive information.
# You are allowed to answer questions about individual team members β€” including their roles, contact information, or personal facts like hobbies, pets, and interests β€” if such information appears in the context.
# If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
# Context:
# {context}
# Question: {question}
# Answer:
# """)


# # πŸ”— QA chain and rephraser
# qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
# Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
# Chat History:
# {chat_history}
# Follow Up Input: {question}
# Standalone question:
# """)

# question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

# # πŸ” Full chain
# chain = ConversationalRetrievalChain(
#     retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
#     question_generator=question_generator,
#     combine_docs_chain=qa_chain,
#     return_source_documents=True,
#     verbose=False
# )

# # πŸ’¬ Gradio UI
# chat_history = []

# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot(
#         [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our team, practice areas, and legal services.")],
#     )
#     msg = gr.Textbox(placeholder="Ask a legal question...")
#     clear = gr.Button("Clear")

#     def user(query, chat_history):
#         chat_history_tuples = [(m[0], m[1]) for m in chat_history]
#         result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
#         chat_history.append((query, result["answer"]))
#         return gr.update(value=""), chat_history

#     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
#     clear.click(lambda: None, None, chatbot, queue=False)

# demo.launch()


import os
import sys
import gradio as gr
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms.base import LLM
from huggingface_hub import InferenceClient

# πŸ‘‡ Hugging Face sqlite3 workaround for Chroma
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# πŸ” Hugging Face API token from HF secrets
HF_API_TOKEN = os.getenv("HF_TOKEN")
if HF_API_TOKEN is None:
    raise ValueError("Missing HF_API_TOKEN in environment.")

# 🧠 Load vector DB from ./db
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma(
    persist_directory="./db",
    embedding_function=embedding_function
)

# πŸ”· Hugging Face Inference Client wrapper
class HuggingFaceLLM(LLM):
    client: InferenceClient = None

    def __init__(self):
        super().__init__()
        self.client = InferenceClient(token=HF_API_TOKEN)

    def _call(self, prompt, stop=None, run_manager=None, **kwargs):
        try:
            # Use chat completion with a working model from the list
            messages = [{"role": "user", "content": prompt}]
            response = self.client.chat_completion(
                messages=messages,
                model="meta-llama/Llama-3.1-8B-Instruct",
                max_tokens=512,
                temperature=0.7
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            try:
                # Fallback to Mistral
                messages = [{"role": "user", "content": prompt}]
                response = self.client.chat_completion(
                    messages=messages,
                    model="mistralai/Mistral-7B-Instruct-v0.3",
                    max_tokens=512,
                    temperature=0.7
                )
                return response.choices[0].message.content.strip()
            except Exception as e2:
                try:
                    # Final fallback to Qwen
                    messages = [{"role": "user", "content": prompt}]
                    response = self.client.chat_completion(
                        messages=messages,
                        model="Qwen/Qwen2.5-7B-Instruct",
                        max_tokens=512,
                        temperature=0.7
                    )
                    return response.choices[0].message.content.strip()
                except Exception as e3:
                    return f"Sorry, I'm having trouble generating a response right now. Error: {str(e3)}"

    @property
    def _llm_type(self):
        return "huggingface_inference_client"

llm = HuggingFaceLLM()

# ✨ Prompt template
prompt = PromptTemplate.from_template("""
You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
this information is available on their website anyway, so it does not count as sensitive information.
You are allowed to answer questions about individual team members β€” including their roles, contact information, or personal facts like hobbies, pets, and interests β€” if such information appears in the context.
If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
Context:
{context}
Question: {question}
Answer:
""")

# πŸ”— QA chain and question generator
qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
""")

question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

# πŸ” Conversational Retrieval Chain
chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
    question_generator=question_generator,
    combine_docs_chain=qa_chain,
    return_source_documents=True,
    verbose=False
)

# πŸ’¬ Gradio UI
chat_history = []

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our work, practice areas, and legal services. I cannot provide information about team members but feel free to ask anything else")],
    )
    msg = gr.Textbox(placeholder="Ask a legal question...")
    clear = gr.Button("Clear")

    def user(query, chat_history):
        chat_history_tuples = [(m[0], m[1]) for m in chat_history]
        result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
        chat_history.append((query, result["answer"]))
        return gr.update(value=""), chat_history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()