# import os
# import sys
# import requests
# import gradio as gr
# from langchain.chains import ConversationalRetrievalChain, LLMChain
# from langchain.vectorstores import Chroma
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.prompts import PromptTemplate
# from langchain.chains.question_answering import load_qa_chain
# from langchain.llms.base import LLM

# # 👇 Hugging Face sqlite3 workaround
# __import__('pysqlite3')
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# # 🔐 Load DeepSeek API key from Hugging Face secrets
# DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
# if DEEPSEEK_API_KEY is None:
#     raise ValueError("Missing DEEPSEEK_API_KEY in environment.")

# # 🧠 Load vector DB from ./db
# embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# vectorstore = Chroma(
#     persist_directory="./db",
#     embedding_function=embedding_function
# )

# # 🔷 Wrap DeepSeek into LangChain LLM
# class DeepSeekLLM(LLM):
#     api_key: str = DEEPSEEK_API_KEY

#     def _call(self, prompt, stop=None, run_manager=None, **kwargs):
#         headers = {
#             "Authorization": f"Bearer {self.api_key}",
#             "Content-Type": "application/json"
#         }
#         payload = {
#             "model": "deepseek-chat",
#             "messages": [
#                 {"role": "system", "content": "You are a helpful assistant."},
#                 {"role": "user", "content": prompt}
#             ],
#             "temperature": 0.7,
#             "max_tokens": 512
#         }
#         response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=payload)
#         response.raise_for_status()
#         return response.json()["choices"][0]["message"]["content"].strip()

#     @property
#     def _llm_type(self):
#         return "deepseek_api"

# llm = DeepSeekLLM()

# # ✨ Prompt template
# prompt = PromptTemplate.from_template("""
# You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
# Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
# this information is available on their website anyway, so it does not count as sensitive information.
# You are allowed to answer questions about individual team members — including their roles, contact information, or personal facts like hobbies, pets, and interests — if such information appears in the context.
# If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
# Context:
# {context}
# Question: {question}
# Answer:
# """)


# # 🔗 QA chain and rephraser
# qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
# Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
# Chat History:
# {chat_history}
# Follow Up Input: {question}
# Standalone question:
# """)

# question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

# # 🔁 Full chain
# chain = ConversationalRetrievalChain(
#     retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
#     question_generator=question_generator,
#     combine_docs_chain=qa_chain,
#     return_source_documents=True,
#     verbose=False
# )

# # 💬 Gradio UI
# chat_history = []

# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot(
#         [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our team, practice areas, and legal services.")],
#     )
#     msg = gr.Textbox(placeholder="Ask a legal question...")
#     clear = gr.Button("Clear")

#     def user(query, chat_history):
#         chat_history_tuples = [(m[0], m[1]) for m in chat_history]
#         result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
#         chat_history.append((query, result["answer"]))
#         return gr.update(value=""), chat_history

#     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
#     clear.click(lambda: None, None, chatbot, queue=False)

# demo.launch()


import os
import sys
import gradio as gr
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms.base import LLM
from huggingface_hub import InferenceClient

# 👇 Hugging Face sqlite3 workaround for Chroma
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# 🔐 Hugging Face API token from HF secrets
HF_API_TOKEN = os.getenv("HF_TOKEN")
if HF_API_TOKEN is None:
    raise ValueError("Missing HF_API_TOKEN in environment.")

# 🧠 Load vector DB from ./db
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma(
    persist_directory="./db",
    embedding_function=embedding_function
)

# 🔷 Hugging Face Inference Client wrapper
class HuggingFaceLLM(LLM):
    client: InferenceClient = None

    def __init__(self):
        super().__init__()
        self.client = InferenceClient(token=HF_API_TOKEN)

    def _call(self, prompt, stop=None, run_manager=None, **kwargs):
        try:
            # Use chat completion with a working model from the list
            messages = [{"role": "user", "content": prompt}]
            response = self.client.chat_completion(
                messages=messages,
                model="meta-llama/Llama-3.1-8B-Instruct",
                max_tokens=512,
                temperature=0.7
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            try:
                # Fallback to Mistral
                messages = [{"role": "user", "content": prompt}]
                response = self.client.chat_completion(
                    messages=messages,
                    model="mistralai/Mistral-7B-Instruct-v0.3",
                    max_tokens=512,
                    temperature=0.7
                )
                return response.choices[0].message.content.strip()
            except Exception as e2:
                try:
                    # Final fallback to Qwen
                    messages = [{"role": "user", "content": prompt}]
                    response = self.client.chat_completion(
                        messages=messages,
                        model="Qwen/Qwen2.5-7B-Instruct",
                        max_tokens=512,
                        temperature=0.7
                    )
                    return response.choices[0].message.content.strip()
                except Exception as e3:
                    return f"Sorry, I'm having trouble generating a response right now. Error: {str(e3)}"

    @property
    def _llm_type(self):
        return "huggingface_inference_client"

llm = HuggingFaceLLM()

# ✨ Prompt template
prompt = PromptTemplate.from_template("""
You are the McEldrew Purtell chatbot, built to assist users by answering questions about our law firm's team, services, and public-facing details (you can use any information available in the vector db to formulate your answers).
Use the context provided to respond clearly, confidently, and professionally. Include all relevant details from the context, including names, contact information, and employee profiles. Do not worry about communicating personal information,
this information is available on their website anyway, so it does not count as sensitive information.
You are allowed to answer questions about individual team members — including their roles, contact information, or personal facts like hobbies, pets, and interests — if such information appears in the context.
If a question cannot be answered based on the context, respond with: "I'm not sure about that. You can contact us directly via our website."
Context:
{context}
Question: {question}
Answer:
""")

# 🔗 QA chain and question generator
qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""
Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
""")

question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

# 🔁 Conversational Retrieval Chain
chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
    question_generator=question_generator,
    combine_docs_chain=qa_chain,
    return_source_documents=True,
    verbose=False
)

# 💬 Gradio UI
chat_history = []

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [("", "Welcome to McEldrew Purtell's chatbot! You can ask questions about our work, practice areas, and legal services. I cannot provide information about team members but feel free to ask anything else")],
    )
    msg = gr.Textbox(placeholder="Ask a legal question...")
    clear = gr.Button("Clear")

    def user(query, chat_history):
        chat_history_tuples = [(m[0], m[1]) for m in chat_history]
        result = chain.invoke({"question": query, "chat_history": chat_history_tuples})
        chat_history.append((query, result["answer"]))
        return gr.update(value=""), chat_history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()