# import os # import sys # import requests # import gradio as gr # from langchain.chains import ConversationalRetrievalChain, LLMChain # from langchain.vectorstores import Chroma # from langchain.embeddings import HuggingFaceEmbeddings # from langchain.prompts import PromptTemplate # from langchain.chains.question_answering import load_qa_chain # from langchain.llms.base import LLM # # 👇 Hugging Face sqlite3 workaround # __import__('pysqlite3') # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') # # 🔐 Load DeepSeek API key from Hugging Face secrets # DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY") # if DEEPSEEK_API_KEY is None: # raise ValueError("Missing DEEPSEEK_API_KEY in environment.") # # 🧠 Load vector DB from ./db # embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # vectorstore = Chroma( # persist_directory="./db", # embedding_function=embedding_function # ) # # 🔷 Wrap DeepSeek into LangChain LLM # class DeepSeekLLM(LLM): # api_key: str = DEEPSEEK_API_KEY # def _call(self, prompt, stop=None, run_manager=None, **kwargs): # headers = { # "Authorization": f"Bearer {self.api_key}", # "Content-Type": "application/json" # } # payload = { # "model": "deepseek-chat", # "messages": [ # {"role": "system", "content": "You are a helpful assistant."}, # {"role": "user", "content": prompt} # ], # "temperature": 0.7, # "max_tokens": 512 # } # response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload) # response.raise_for_status() # return response.json()["choices"][0]["message"]["content"].strip() # @property # def _llm_type(self): # return "deepseek_api" # llm = DeepSeekLLM() # # ✨ Prompt template # prompt = PromptTemplate.from_template(""" # You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers). # Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information, # this information is available on their website anyway, so it does not count as sensitive information. # You are allowed to answer questions about individual team members — including their roles, contact information, or personal facts like hobbies, pets, and interests — if such information appears in the context. # If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website." # Context: # {context} # Question: {question} # Answer: # """) # # 🔗 QA chain and rephraser # qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt) # CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(""" # Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question. # Chat History: # {chat_history} # Follow Up Input: {question} # Standalone question: # """) # question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) # # 🔁 Full chain # chain = ConversationalRetrievalChain( # retriever=vectorstore.as_retriever(search_kwargs={"k": 6}), # question_generator=question_generator, # combine_docs_chain=qa_chain, # return_source_documents=True, # verbose=False # ) # # 💬 Gradio UI # chat_history = [] # with gr.Blocks() as demo: # chatbot = gr.Chatbot( # [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our team, practice areas, and legal services.")], # ) # msg = gr.Textbox(placeholder="Ask a legal question...") # clear = gr.Button("Clear") # def user(query, chat_history): # chat_history_tuples = [(m[0], m[1]) for m in chat_history] # result = chain.invoke({"question": query, "chat_history": chat_history_tuples}) # chat_history.append((query, result["answer"])) # return gr.update(value=""), chat_history # msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) # clear.click(lambda: None, None, chatbot, queue=False) # demo.launch() import os import sys import gradio as gr from langchain.chains import ConversationalRetrievalChain, LLMChain from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from langchain.prompts import PromptTemplate from langchain.chains.question_answering import load_qa_chain from langchain.llms.base import LLM from huggingface_hub import InferenceClient # 👇 Hugging Face sqlite3 workaround for Chroma __import__('pysqlite3') sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') # 🔐 Hugging Face API token from HF secrets HF_API_TOKEN = os.getenv("HF_TOKEN") if HF_API_TOKEN is None: raise ValueError("Missing HF_API_TOKEN in environment.") # 🧠 Load vector DB from ./db embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = Chroma( persist_directory="./db", embedding_function=embedding_function ) # 🔷 Hugging Face Inference Client wrapper class HuggingFaceLLM(LLM): client: InferenceClient = None def __init__(self): super().__init__() self.client = InferenceClient(token=HF_API_TOKEN) def _call(self, prompt, stop=None, run_manager=None, **kwargs): try: # Use chat completion with a working model from the list messages = [{"role": "user", "content": prompt}] response = self.client.chat_completion( messages=messages, model="meta-llama/Llama-3.1-8B-Instruct", max_tokens=512, temperature=0.7 ) return response.choices[0].message.content.strip() except Exception as e: try: # Fallback to Mistral messages = [{"role": "user", "content": prompt}] response = self.client.chat_completion( messages=messages, model="mistralai/Mistral-7B-Instruct-v0.3", max_tokens=512, temperature=0.7 ) return response.choices[0].message.content.strip() except Exception as e2: try: # Final fallback to Qwen messages = [{"role": "user", "content": prompt}] response = self.client.chat_completion( messages=messages, model="Qwen/Qwen2.5-7B-Instruct", max_tokens=512, temperature=0.7 ) return response.choices[0].message.content.strip() except Exception as e3: return f"Sorry, I'm having trouble generating a response right now. Error: {str(e3)}" @property def _llm_type(self): return "huggingface_inference_client" llm = HuggingFaceLLM() # ✨ Prompt template prompt = PromptTemplate.from_template(""" You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers). Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information, this information is available on their website anyway, so it does not count as sensitive information. You are allowed to answer questions about individual team members — including their roles, contact information, or personal facts like hobbies, pets, and interests — if such information appears in the context. If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website." Context: {context} Question: {question} Answer: """) # 🔗 QA chain and question generator qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt) CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(""" Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question. Chat History: {chat_history} Follow Up Input: {question} Standalone question: """) question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) # 🔁 Conversational Retrieval Chain chain = ConversationalRetrievalChain( retriever=vectorstore.as_retriever(search_kwargs={"k": 6}), question_generator=question_generator, combine_docs_chain=qa_chain, return_source_documents=True, verbose=False ) # 💬 Gradio UI chat_history = [] with gr.Blocks() as demo: chatbot = gr.Chatbot( [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our work, practice areas, and legal services. I cannot provide information about team members but feel free to ask anything else")], ) msg = gr.Textbox(placeholder="Ask a legal question...") clear = gr.Button("Clear") def user(query, chat_history): chat_history_tuples = [(m[0], m[1]) for m in chat_history] result = chain.invoke({"question": query, "chat_history": chat_history_tuples}) chat_history.append((query, result["answer"])) return gr.update(value=""), chat_history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()