import gradio as gr import PyPDF2 from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings import faiss import numpy as np from huggingface_hub import hf_hub_download from llama_cpp import Llama def answer_from_pdf(pdf_file, user_question): # —————————————————————— # 1) PDF Text Extraction # —————————————————————— pdf_reader = PyPDF2.PdfReader(pdf_file.name) raw_text = "" for page in pdf_reader.pages: raw_text += page.extract_text() # —————————————————————— # 2) Chunking with LangChain # —————————————————————— splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, chunk_overlap = 200, separators = ["\n\n", "\n", " ", ""] ) chunks = splitter.split_text(raw_text) # —————————————————————————— # 3) Embedding & FAISS Indexing # —————————————————————————— hf_embedder = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", ) doc_embeddings = hf_embedder.embed_documents(chunks) emb_array = np.array(doc_embeddings, dtype=np.float32) index = faiss.IndexFlatL2(emb_array.shape[1]) index.add(emb_array) # —————————————————————————— # 4) Load LLAma model # —————————————————————————— gguf_path = hf_hub_download( repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", filename="Meta-Llama-3.1-8B-Instruct-Q6_K.gguf", ) llama = Llama(model_path=gguf_path, n_ctx=2048) # —————————————————————————— # 5) Retrieval & Prompting # —————————————————————————— system_msg = ( "You are a knowledgeable assistant. " "Use the provided context to answer the user's question concisely, " "if it's not in the provided context, you say I don't know" "avoid hallucinations, and keep answers under 150 words." ) q_emb = hf_embedder.embed_query(user_question) q_vec = np.array(q_emb, dtype=np.float32).reshape(1, -1) D, I = index.search(q_vec, k=3) context_chunks = [chunks[i] for i in I[0]] context_text = "\n\n".join(context_chunks) prompt = ( f"SYSTEM:\n{system_msg}\n\n" f"CONTEXT:\n{context_text}\n\n" f"USER:\n{user_question}\n\n" "ASSISTANT:" ) resp = llama.create_completion( prompt=prompt, max_tokens=256, temperature=0.7, stop=["\n\n"] ) return resp["choices"][0]["text"].strip() # Build a chat interface with gr.Blocks() as demo: gr.Markdown("## PDF RAG Chatbot (LangChain + LLaMA)") upload = gr.File(label="Upload PDF") chatbot = gr.Chatbot() user_input = gr.Textbox(placeholder="Type your question and hit Send") send_btn = gr.Button("Send") def chat_step(pdf_file, message, chat_history): response = answer_from_pdf(pdf_file, message) chat_history = chat_history + [(message, response)] return chat_history, "" send_btn.click( chat_step, inputs=[upload, user_input, chatbot], outputs=[chatbot, user_input] ) if __name__ == "__main__": demo.launch()