import gradio as gr
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
import numpy as np
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

def answer_from_pdf(pdf_file, user_question):
    # ——————————————————————
    # 1) PDF Text Extraction
    # ——————————————————————

    pdf_reader = PyPDF2.PdfReader(pdf_file.name)
    raw_text = ""
    for page in pdf_reader.pages:
        raw_text += page.extract_text()

    # ——————————————————————
    # 2) Chunking with LangChain
    # ——————————————————————
    splitter = RecursiveCharacterTextSplitter(
        chunk_size    = 1000,
        chunk_overlap = 200,
        separators    = ["\n\n", "\n", " ", ""]
    )
    chunks = splitter.split_text(raw_text)

    # ——————————————————————————
    # 3) Embedding & FAISS Indexing
    # ——————————————————————————
    hf_embedder = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
    )
    doc_embeddings = hf_embedder.embed_documents(chunks)
    emb_array = np.array(doc_embeddings, dtype=np.float32)
    index = faiss.IndexFlatL2(emb_array.shape[1])
    index.add(emb_array)

    # ——————————————————————————
    # 4) Load LLAma model
    # ——————————————————————————
    gguf_path = hf_hub_download(
        repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
        filename="Meta-Llama-3.1-8B-Instruct-Q6_K.gguf",
    )
    llama = Llama(model_path=gguf_path, n_ctx=2048)

    # ——————————————————————————
    # 5) Retrieval & Prompting
    # ——————————————————————————
    system_msg = (
        "You are a knowledgeable assistant. "
        "Use the provided context to answer the user's question concisely, "
        "if it's not in the provided context, you say I don't know"
        "avoid hallucinations, and keep answers under 150 words."
    )
    q_emb = hf_embedder.embed_query(user_question)
    q_vec = np.array(q_emb, dtype=np.float32).reshape(1, -1)
    D, I = index.search(q_vec, k=3)
    context_chunks = [chunks[i] for i in I[0]]
    context_text = "\n\n".join(context_chunks)

    prompt = (
        f"SYSTEM:\n{system_msg}\n\n"
        f"CONTEXT:\n{context_text}\n\n"
        f"USER:\n{user_question}\n\n"
        "ASSISTANT:"
    )

    resp = llama.create_completion(
        prompt=prompt,
        max_tokens=256,
        temperature=0.7,
        stop=["\n\n"]
    )
    return resp["choices"][0]["text"].strip()

# Build a chat interface
with gr.Blocks() as demo:
    gr.Markdown("## PDF RAG Chatbot (LangChain + LLaMA)")
    upload = gr.File(label="Upload PDF")
    chatbot = gr.Chatbot()
    user_input = gr.Textbox(placeholder="Type your question and hit Send")
    send_btn = gr.Button("Send")

    def chat_step(pdf_file, message, chat_history):
        response = answer_from_pdf(pdf_file, message)
        chat_history = chat_history + [(message, response)]
        return chat_history, ""

    send_btn.click(
        chat_step,
        inputs=[upload, user_input, chatbot],
        outputs=[chatbot, user_input]
    )

if __name__ == "__main__":
    demo.launch()