# import os | |
# import sys | |
# import requests | |
# import gradio as gr | |
# from langchain.chains import ConversationalRetrievalChain, LLMChain | |
# from langchain.vectorstores import Chroma | |
# from langchain.embeddings import HuggingFaceEmbeddings | |
# from langchain.prompts import PromptTemplate | |
# from langchain.chains.question_answering import load_qa_chain | |
# from langchain.llms.base import LLM | |
# # π Hugging Face sqlite3 workaround | |
# __import__('pysqlite3') | |
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
# # π Load DeepSeek API key from Hugging Face secrets | |
# DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY") | |
# if DEEPSEEK_API_KEY is None: | |
# raise ValueError("Missing DEEPSEEK_API_KEY in environment.") | |
# # π§ Load vector DB from ./db | |
# embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# vectorstore = Chroma( | |
# persist_directory="./db", | |
# embedding_function=embedding_function | |
# ) | |
# # π· Wrap DeepSeek into LangChain LLM | |
# class DeepSeekLLM(LLM): | |
# api_key: str = DEEPSEEK_API_KEY | |
# def _call(self, prompt, stop=None, run_manager=None, **kwargs): | |
# headers = { | |
# "Authorization": f"Bearer {self.api_key}", | |
# "Content-Type": "application/json" | |
# } | |
# payload = { | |
# "model": "deepseek-chat", | |
# "messages": [ | |
# {"role": "system", "content": "You are a helpful assistant."}, | |
# {"role": "user", "content": prompt} | |
# ], | |
# "temperature": 0.7, | |
# "max_tokens": 512 | |
# } | |
# response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload) | |
# response.raise_for_status() | |
# return response.json()["choices"][0]["message"]["content"].strip() | |
# @property | |
# def _llm_type(self): | |
# return "deepseek_api" | |
# llm = DeepSeekLLM() | |
# # β¨ Prompt template | |
# prompt = PromptTemplate.from_template(""" | |
# You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers). | |
# Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information, | |
# this information is available on their website anyway, so it does not count as sensitive information. | |
# You are allowed to answer questions about individual team members β including their roles, contact information, or personal facts like hobbies, pets, and interests β if such information appears in the context. | |
# If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website." | |
# Context: | |
# {context} | |
# Question: {question} | |
# Answer: | |
# """) | |
# # π QA chain and rephraser | |
# qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt) | |
# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(""" | |
# Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question. | |
# Chat History: | |
# {chat_history} | |
# Follow Up Input: {question} | |
# Standalone question: | |
# """) | |
# question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) | |
# # π Full chain | |
# chain = ConversationalRetrievalChain( | |
# retriever=vectorstore.as_retriever(search_kwargs={"k": 6}), | |
# question_generator=question_generator, | |
# combine_docs_chain=qa_chain, | |
# return_source_documents=True, | |
# verbose=False | |
# ) | |
# # π¬ Gradio UI | |
# chat_history = [] | |
# with gr.Blocks() as demo: | |
# chatbot = gr.Chatbot( | |
# [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our team, practice areas, and legal services.")], | |
# ) | |
# msg = gr.Textbox(placeholder="Ask a legal question...") | |
# clear = gr.Button("Clear") | |
# def user(query, chat_history): | |
# chat_history_tuples = [(m[0], m[1]) for m in chat_history] | |
# result = chain.invoke({"question": query, "chat_history": chat_history_tuples}) | |
# chat_history.append((query, result["answer"])) | |
# return gr.update(value=""), chat_history | |
# msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) | |
# clear.click(lambda: None, None, chatbot, queue=False) | |
# demo.launch() | |
import os | |
import sys | |
import gradio as gr | |
from langchain.chains import ConversationalRetrievalChain, LLMChain | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.llms.base import LLM | |
from huggingface_hub import InferenceClient | |
# π Hugging Face sqlite3 workaround for Chroma | |
__import__('pysqlite3') | |
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
# π Hugging Face API token from HF secrets | |
HF_API_TOKEN = os.getenv("HF_TOKEN") | |
if HF_API_TOKEN is None: | |
raise ValueError("Missing HF_API_TOKEN in environment.") | |
# π§ Load vector DB from ./db | |
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
vectorstore = Chroma( | |
persist_directory="./db", | |
embedding_function=embedding_function | |
) | |
# π· Hugging Face Inference Client wrapper | |
class HuggingFaceLLM(LLM): | |
client: InferenceClient = None | |
def __init__(self): | |
super().__init__() | |
self.client = InferenceClient(token=HF_API_TOKEN) | |
def _call(self, prompt, stop=None, run_manager=None, **kwargs): | |
try: | |
# Use chat completion with a working model from the list | |
messages = [{"role": "user", "content": prompt}] | |
response = self.client.chat_completion( | |
messages=messages, | |
model="meta-llama/Llama-3.1-8B-Instruct", | |
max_tokens=512, | |
temperature=0.7 | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
try: | |
# Fallback to Mistral | |
messages = [{"role": "user", "content": prompt}] | |
response = self.client.chat_completion( | |
messages=messages, | |
model="mistralai/Mistral-7B-Instruct-v0.3", | |
max_tokens=512, | |
temperature=0.7 | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e2: | |
try: | |
# Final fallback to Qwen | |
messages = [{"role": "user", "content": prompt}] | |
response = self.client.chat_completion( | |
messages=messages, | |
model="Qwen/Qwen2.5-7B-Instruct", | |
max_tokens=512, | |
temperature=0.7 | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e3: | |
return f"Sorry, I'm having trouble generating a response right now. Error: {str(e3)}" | |
def _llm_type(self): | |
return "huggingface_inference_client" | |
llm = HuggingFaceLLM() | |
# β¨ Prompt template | |
prompt = PromptTemplate.from_template(""" | |
You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers). | |
Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information, | |
this information is available on their website anyway, so it does not count as sensitive information. | |
You are allowed to answer questions about individual team members β including their roles, contact information, or personal facts like hobbies, pets, and interests β if such information appears in the context. | |
If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website." | |
Context: | |
{context} | |
Question: {question} | |
Answer: | |
""") | |
# π QA chain and question generator | |
qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt) | |
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(""" | |
Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question. | |
Chat History: | |
{chat_history} | |
Follow Up Input: {question} | |
Standalone question: | |
""") | |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) | |
# π Conversational Retrieval Chain | |
chain = ConversationalRetrievalChain( | |
retriever=vectorstore.as_retriever(search_kwargs={"k": 6}), | |
question_generator=question_generator, | |
combine_docs_chain=qa_chain, | |
return_source_documents=True, | |
verbose=False | |
) | |
# π¬ Gradio UI | |
chat_history = [] | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot( | |
[("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our work, practice areas, and legal services. I cannot provide information about team members but feel free to ask anything else")], | |
) | |
msg = gr.Textbox(placeholder="Ask a legal question...") | |
clear = gr.Button("Clear") | |
def user(query, chat_history): | |
chat_history_tuples = [(m[0], m[1]) for m in chat_history] | |
result = chain.invoke({"question": query, "chat_history": chat_history_tuples}) | |
chat_history.append((query, result["answer"])) | |
return gr.update(value=""), chat_history | |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.launch() | |