Spaces:
Sleeping
Sleeping
import gradio as gr | |
import PyPDF2 | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
import faiss | |
import numpy as np | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
def answer_from_pdf(pdf_file, user_question): | |
# ββββββββββββββββββββββ | |
# 1) PDF Text Extraction | |
# ββββββββββββββββββββββ | |
pdf_reader = PyPDF2.PdfReader(pdf_file.name) | |
raw_text = "" | |
for page in pdf_reader.pages: | |
raw_text += page.extract_text() | |
# ββββββββββββββββββββββ | |
# 2) Chunking with LangChain | |
# ββββββββββββββββββββββ | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size = 1000, | |
chunk_overlap = 200, | |
separators = ["\n\n", "\n", " ", ""] | |
) | |
chunks = splitter.split_text(raw_text) | |
# ββββββββββββββββββββββββββ | |
# 3) Embedding & FAISS Indexing | |
# ββββββββββββββββββββββββββ | |
hf_embedder = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-MiniLM-L6-v2", | |
) | |
doc_embeddings = hf_embedder.embed_documents(chunks) | |
emb_array = np.array(doc_embeddings, dtype=np.float32) | |
index = faiss.IndexFlatL2(emb_array.shape[1]) | |
index.add(emb_array) | |
# ββββββββββββββββββββββββββ | |
# 4) Load LLAma model | |
# ββββββββββββββββββββββββββ | |
gguf_path = hf_hub_download( | |
repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", | |
filename="Meta-Llama-3.1-8B-Instruct-Q6_K.gguf", | |
) | |
llama = Llama(model_path=gguf_path, n_ctx=2048) | |
# ββββββββββββββββββββββββββ | |
# 5) Retrieval & Prompting | |
# ββββββββββββββββββββββββββ | |
system_msg = ( | |
"You are a knowledgeable assistant. " | |
"Use the provided context to answer the user's question concisely, " | |
"if it's not in the provided context, you say I don't know" | |
"avoid hallucinations, and keep answers under 150 words." | |
) | |
q_emb = hf_embedder.embed_query(user_question) | |
q_vec = np.array(q_emb, dtype=np.float32).reshape(1, -1) | |
D, I = index.search(q_vec, k=3) | |
context_chunks = [chunks[i] for i in I[0]] | |
context_text = "\n\n".join(context_chunks) | |
prompt = ( | |
f"SYSTEM:\n{system_msg}\n\n" | |
f"CONTEXT:\n{context_text}\n\n" | |
f"USER:\n{user_question}\n\n" | |
"ASSISTANT:" | |
) | |
resp = llama.create_completion( | |
prompt=prompt, | |
max_tokens=256, | |
temperature=0.7, | |
stop=["\n\n"] | |
) | |
return resp["choices"][0]["text"].strip() | |
# Build a chat interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## PDF RAG Chatbot (LangChain + LLaMA)") | |
upload = gr.File(label="Upload PDF") | |
chatbot = gr.Chatbot() | |
user_input = gr.Textbox(placeholder="Type your question and hit Send") | |
send_btn = gr.Button("Send") | |
def chat_step(pdf_file, message, chat_history): | |
response = answer_from_pdf(pdf_file, message) | |
chat_history = chat_history + [(message, response)] | |
return chat_history, "" | |
send_btn.click( | |
chat_step, | |
inputs=[upload, user_input, chatbot], | |
outputs=[chatbot, user_input] | |
) | |
if __name__ == "__main__": | |
demo.launch() |