File size: 9,954 Bytes
501d7ec 015031c afcfc32 015031c 501d7ec 015031c 501d7ec 174598b 501d7ec 015031c afcfc32 015031c afcfc32 015031c afcfc32 015031c afcfc32 015031c 5d53041 5daaf5c 015031c 501d7ec 015031c 501d7ec 015031c da8ed0f 015031c 501d7ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
# import os
# import sys
# import requests
# import gradio as gr
# from langchain.chains import ConversationalRetrievalChain, LLMChain
# from langchain.vectorstores import Chroma
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.prompts import PromptTemplate
# from langchain.chains.question_answering import load_qa_chain
# from langchain.llms.base import LLM
# # π Hugging Face sqlite3 workaround
# __import__('pysqlite3')
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
# # π Load DeepSeek API key from Hugging Face secrets
# DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
# if DEEPSEEK_API_KEY is None:
# raise ValueError("Missing DEEPSEEK_API_KEY in environment.")
# # π§ Load vector DB from ./db
# embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# vectorstore = Chroma(
# persist_directory="./db",
# embedding_function=embedding_function
# )
# # π· Wrap DeepSeek into LangChain LLM
# class DeepSeekLLM(LLM):
# api_key: str = DEEPSEEK_API_KEY
# def _call(self, prompt, stop=None, run_manager=None, **kwargs):
# headers = {
# "Authorization": f"Bearer {self.api_key}",
# "Content-Type": "application/json"
# }
# payload = {
# "model": "deepseek-chat",
# "messages": [
# {"role": "system", "content": "You are a helpful assistant."},
# {"role": "user", "content": prompt}
# ],
# "temperature": 0.7,
# "max_tokens": 512
# }
# response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload)
# response.raise_for_status()
# return response.json()["choices"][0]["message"]["content"].strip()
# @property
# def _llm_type(self):
# return "deepseek_api"
# llm = DeepSeekLLM()
# # β¨ Prompt template
# prompt = PromptTemplate.from_template("""
# You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
# Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
# this information is available on their website anyway, so it does not count as sensitive information.
# You are allowed to answer questions about individual team members β including their roles, contact information, or personal facts like hobbies, pets, and interests β if such information appears in the context.
# If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
# Context:
# {context}
# Question: {question}
# Answer:
# """)
# # π QA chain and rephraser
# qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
# Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
# Chat History:
# {chat_history}
# Follow Up Input: {question}
# Standalone question:
# """)
# question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
# # π Full chain
# chain = ConversationalRetrievalChain(
# retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
# question_generator=question_generator,
# combine_docs_chain=qa_chain,
# return_source_documents=True,
# verbose=False
# )
# # π¬ Gradio UI
# chat_history = []
# with gr.Blocks() as demo:
# chatbot = gr.Chatbot(
# [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our team, practice areas, and legal services.")],
# )
# msg = gr.Textbox(placeholder="Ask a legal question...")
# clear = gr.Button("Clear")
# def user(query, chat_history):
# chat_history_tuples = [(m[0], m[1]) for m in chat_history]
# result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
# chat_history.append((query, result["answer"]))
# return gr.update(value=""), chat_history
# msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
# clear.click(lambda: None, None, chatbot, queue=False)
# demo.launch()
import os
import sys
import gradio as gr
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms.base import LLM
from huggingface_hub import InferenceClient
# π Hugging Face sqlite3 workaround for Chroma
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
# π Hugging Face API token from HF secrets
HF_API_TOKEN = os.getenv("HF_TOKEN")
if HF_API_TOKEN is None:
raise ValueError("Missing HF_API_TOKEN in environment.")
# π§ Load vector DB from ./db
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma(
persist_directory="./db",
embedding_function=embedding_function
)
# π· Hugging Face Inference Client wrapper
class HuggingFaceLLM(LLM):
client: InferenceClient = None
def __init__(self):
super().__init__()
self.client = InferenceClient(token=HF_API_TOKEN)
def _call(self, prompt, stop=None, run_manager=None, **kwargs):
try:
# Use chat completion with a working model from the list
messages = [{"role": "user", "content": prompt}]
response = self.client.chat_completion(
messages=messages,
model="meta-llama/Llama-3.1-8B-Instruct",
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content.strip()
except Exception as e:
try:
# Fallback to Mistral
messages = [{"role": "user", "content": prompt}]
response = self.client.chat_completion(
messages=messages,
model="mistralai/Mistral-7B-Instruct-v0.3",
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content.strip()
except Exception as e2:
try:
# Final fallback to Qwen
messages = [{"role": "user", "content": prompt}]
response = self.client.chat_completion(
messages=messages,
model="Qwen/Qwen2.5-7B-Instruct",
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content.strip()
except Exception as e3:
return f"Sorry, I'm having trouble generating a response right now. Error: {str(e3)}"
@property
def _llm_type(self):
return "huggingface_inference_client"
llm = HuggingFaceLLM()
# β¨ Prompt template
prompt = PromptTemplate.from_template("""
You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
this information is available on their website anyway, so it does not count as sensitive information.
You are allowed to answer questions about individual team members β including their roles, contact information, or personal facts like hobbies, pets, and interests β if such information appears in the context.
If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
Context:
{context}
Question: {question}
Answer:
""")
# π QA chain and question generator
qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
""")
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
# π Conversational Retrieval Chain
chain = ConversationalRetrievalChain(
retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
question_generator=question_generator,
combine_docs_chain=qa_chain,
return_source_documents=True,
verbose=False
)
# π¬ Gradio UI
chat_history = []
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
[("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our work, practice areas, and legal services. I cannot provide information about team members but feel free to ask anything else")],
)
msg = gr.Textbox(placeholder="Ask a legal question...")
clear = gr.Button("Clear")
def user(query, chat_history):
chat_history_tuples = [(m[0], m[1]) for m in chat_history]
result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
chat_history.append((query, result["answer"]))
return gr.update(value=""), chat_history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch()
|