Spaces:

tdecae
/

McEP

Sleeping

App Files Files Community

McEP / app.py

tdecae

Update app.py

da8ed0f verified 18 days ago

raw

history blame contribute delete

9.95 kB

	# import os
	# import sys
	# import requests
	# import gradio as gr
	# from langchain.chains import ConversationalRetrievalChain, LLMChain
	# from langchain.vectorstores import Chroma
	# from langchain.embeddings import HuggingFaceEmbeddings
	# from langchain.prompts import PromptTemplate
	# from langchain.chains.question_answering import load_qa_chain
	# from langchain.llms.base import LLM

	# # 👇 Hugging Face sqlite3 workaround
	# __import__('pysqlite3')
	# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

	# # 🔐 Load DeepSeek API key from Hugging Face secrets
	# DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
	# if DEEPSEEK_API_KEY is None:
	# raise ValueError("Missing DEEPSEEK_API_KEY in environment.")

	# # 🧠 Load vector DB from ./db
	# embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	# vectorstore = Chroma(
	# persist_directory="./db",
	# embedding_function=embedding_function
	# )

	# # 🔷 Wrap DeepSeek into LangChain LLM
	# class DeepSeekLLM(LLM):
	# api_key: str = DEEPSEEK_API_KEY

	# def _call(self, prompt, stop=None, run_manager=None, **kwargs):
	# headers = {
	# "Authorization": f"Bearer {self.api_key}",
	# "Content-Type": "application/json"
	# }
	# payload = {
	# "model": "deepseek-chat",
	# "messages": [
	# {"role": "system", "content": "You are a helpful assistant."},
	# {"role": "user", "content": prompt}
	# ],
	# "temperature": 0.7,
	# "max_tokens": 512
	# }
	# response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload)
	# response.raise_for_status()
	# return response.json()["choices"][0]["message"]["content"].strip()

	# @property
	# def _llm_type(self):
	# return "deepseek_api"

	# llm = DeepSeekLLM()

	# # ✨ Prompt template
	# prompt = PromptTemplate.from_template("""
	# You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
	# Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
	# this information is available on their website anyway, so it does not count as sensitive information.
	# You are allowed to answer questions about individual team members — including their roles, contact information, or personal facts like hobbies, pets, and interests — if such information appears in the context.
	# If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
	# Context:
	# {context}
	# Question: {question}
	# Answer:
	# """)


	# # 🔗 QA chain and rephraser
	# qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

	# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
	# Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
	# Chat History:
	# {chat_history}
	# Follow Up Input: {question}
	# Standalone question:
	# """)

	# question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

	# # 🔁 Full chain
	# chain = ConversationalRetrievalChain(
	# retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
	# question_generator=question_generator,
	# combine_docs_chain=qa_chain,
	# return_source_documents=True,
	# verbose=False
	# )

	# # 💬 Gradio UI
	# chat_history = []

	# with gr.Blocks() as demo:
	# chatbot = gr.Chatbot(
	# [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our team, practice areas, and legal services.")],
	# )
	# msg = gr.Textbox(placeholder="Ask a legal question...")
	# clear = gr.Button("Clear")

	# def user(query, chat_history):
	# chat_history_tuples = [(m[0], m[1]) for m in chat_history]
	# result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
	# chat_history.append((query, result["answer"]))
	# return gr.update(value=""), chat_history

	# msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
	# clear.click(lambda: None, None, chatbot, queue=False)

	# demo.launch()


	import os
	import sys
	import gradio as gr
	from langchain.chains import ConversationalRetrievalChain, LLMChain
	from langchain.vectorstores import Chroma
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.prompts import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	from langchain.llms.base import LLM
	from huggingface_hub import InferenceClient

	# 👇 Hugging Face sqlite3 workaround for Chroma
	__import__('pysqlite3')
	sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

	# 🔐 Hugging Face API token from HF secrets
	HF_API_TOKEN = os.getenv("HF_TOKEN")
	if HF_API_TOKEN is None:
	raise ValueError("Missing HF_API_TOKEN in environment.")

	# 🧠 Load vector DB from ./db
	embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vectorstore = Chroma(
	persist_directory="./db",
	embedding_function=embedding_function
	)

	# 🔷 Hugging Face Inference Client wrapper
	class HuggingFaceLLM(LLM):
	client: InferenceClient = None

	def __init__(self):
	super().__init__()
	self.client = InferenceClient(token=HF_API_TOKEN)

	def _call(self, prompt, stop=None, run_manager=None, **kwargs):
	try:
	# Use chat completion with a working model from the list
	messages = [{"role": "user", "content": prompt}]
	response = self.client.chat_completion(
	messages=messages,
	model="meta-llama/Llama-3.1-8B-Instruct",
	max_tokens=512,
	temperature=0.7
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	try:
	# Fallback to Mistral
	messages = [{"role": "user", "content": prompt}]
	response = self.client.chat_completion(
	messages=messages,
	model="mistralai/Mistral-7B-Instruct-v0.3",
	max_tokens=512,
	temperature=0.7
	)
	return response.choices[0].message.content.strip()
	except Exception as e2:
	try:
	# Final fallback to Qwen
	messages = [{"role": "user", "content": prompt}]
	response = self.client.chat_completion(
	messages=messages,
	model="Qwen/Qwen2.5-7B-Instruct",
	max_tokens=512,
	temperature=0.7
	)
	return response.choices[0].message.content.strip()
	except Exception as e3:
	return f"Sorry, I'm having trouble generating a response right now. Error: {str(e3)}"

	@property
	def _llm_type(self):
	return "huggingface_inference_client"

	llm = HuggingFaceLLM()

	# ✨ Prompt template
	prompt = PromptTemplate.from_template("""
	You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
	Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
	this information is available on their website anyway, so it does not count as sensitive information.
	You are allowed to answer questions about individual team members — including their roles, contact information, or personal facts like hobbies, pets, and interests — if such information appears in the context.
	If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
	Context:
	{context}
	Question: {question}
	Answer:
	""")

	# 🔗 QA chain and question generator
	qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

	CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
	Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
	Chat History:
	{chat_history}
	Follow Up Input: {question}
	Standalone question:
	""")

	question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

	# 🔁 Conversational Retrieval Chain
	chain = ConversationalRetrievalChain(
	retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
	question_generator=question_generator,
	combine_docs_chain=qa_chain,
	return_source_documents=True,
	verbose=False
	)

	# 💬 Gradio UI
	chat_history = []

	with gr.Blocks() as demo:
	chatbot = gr.Chatbot(
	[("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our work, practice areas, and legal services. I cannot provide information about team members but feel free to ask anything else")],
	)
	msg = gr.Textbox(placeholder="Ask a legal question...")
	clear = gr.Button("Clear")

	def user(query, chat_history):
	chat_history_tuples = [(m[0], m[1]) for m in chat_history]
	result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
	chat_history.append((query, result["answer"]))
	return gr.update(value=""), chat_history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.launch()