PDF_Chatbot / app.py
balaji4991512's picture
Create app.py
ea252df verified
import gradio as gr
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
import numpy as np
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
def answer_from_pdf(pdf_file, user_question):
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# 1) PDF Text Extraction
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
pdf_reader = PyPDF2.PdfReader(pdf_file.name)
raw_text = ""
for page in pdf_reader.pages:
raw_text += page.extract_text()
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# 2) Chunking with LangChain
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
separators = ["\n\n", "\n", " ", ""]
)
chunks = splitter.split_text(raw_text)
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# 3) Embedding & FAISS Indexing
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
hf_embedder = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
)
doc_embeddings = hf_embedder.embed_documents(chunks)
emb_array = np.array(doc_embeddings, dtype=np.float32)
index = faiss.IndexFlatL2(emb_array.shape[1])
index.add(emb_array)
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# 4) Load LLAma model
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
gguf_path = hf_hub_download(
repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
filename="Meta-Llama-3.1-8B-Instruct-Q6_K.gguf",
)
llama = Llama(model_path=gguf_path, n_ctx=2048)
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# 5) Retrieval & Prompting
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
system_msg = (
"You are a knowledgeable assistant. "
"Use the provided context to answer the user's question concisely, "
"if it's not in the provided context, you say I don't know"
"avoid hallucinations, and keep answers under 150 words."
)
q_emb = hf_embedder.embed_query(user_question)
q_vec = np.array(q_emb, dtype=np.float32).reshape(1, -1)
D, I = index.search(q_vec, k=3)
context_chunks = [chunks[i] for i in I[0]]
context_text = "\n\n".join(context_chunks)
prompt = (
f"SYSTEM:\n{system_msg}\n\n"
f"CONTEXT:\n{context_text}\n\n"
f"USER:\n{user_question}\n\n"
"ASSISTANT:"
)
resp = llama.create_completion(
prompt=prompt,
max_tokens=256,
temperature=0.7,
stop=["\n\n"]
)
return resp["choices"][0]["text"].strip()
# Build a chat interface
with gr.Blocks() as demo:
gr.Markdown("## PDF RAG Chatbot (LangChain + LLaMA)")
upload = gr.File(label="Upload PDF")
chatbot = gr.Chatbot()
user_input = gr.Textbox(placeholder="Type your question and hit Send")
send_btn = gr.Button("Send")
def chat_step(pdf_file, message, chat_history):
response = answer_from_pdf(pdf_file, message)
chat_history = chat_history + [(message, response)]
return chat_history, ""
send_btn.click(
chat_step,
inputs=[upload, user_input, chatbot],
outputs=[chatbot, user_input]
)
if __name__ == "__main__":
demo.launch()