Spaces:
Sleeping
Sleeping
File size: 3,646 Bytes
ea252df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
import numpy as np
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
def answer_from_pdf(pdf_file, user_question):
# ββββββββββββββββββββββ
# 1) PDF Text Extraction
# ββββββββββββββββββββββ
pdf_reader = PyPDF2.PdfReader(pdf_file.name)
raw_text = ""
for page in pdf_reader.pages:
raw_text += page.extract_text()
# ββββββββββββββββββββββ
# 2) Chunking with LangChain
# ββββββββββββββββββββββ
splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
separators = ["\n\n", "\n", " ", ""]
)
chunks = splitter.split_text(raw_text)
# ββββββββββββββββββββββββββ
# 3) Embedding & FAISS Indexing
# ββββββββββββββββββββββββββ
hf_embedder = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
)
doc_embeddings = hf_embedder.embed_documents(chunks)
emb_array = np.array(doc_embeddings, dtype=np.float32)
index = faiss.IndexFlatL2(emb_array.shape[1])
index.add(emb_array)
# ββββββββββββββββββββββββββ
# 4) Load LLAma model
# ββββββββββββββββββββββββββ
gguf_path = hf_hub_download(
repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
filename="Meta-Llama-3.1-8B-Instruct-Q6_K.gguf",
)
llama = Llama(model_path=gguf_path, n_ctx=2048)
# ββββββββββββββββββββββββββ
# 5) Retrieval & Prompting
# ββββββββββββββββββββββββββ
system_msg = (
"You are a knowledgeable assistant. "
"Use the provided context to answer the user's question concisely, "
"if it's not in the provided context, you say I don't know"
"avoid hallucinations, and keep answers under 150 words."
)
q_emb = hf_embedder.embed_query(user_question)
q_vec = np.array(q_emb, dtype=np.float32).reshape(1, -1)
D, I = index.search(q_vec, k=3)
context_chunks = [chunks[i] for i in I[0]]
context_text = "\n\n".join(context_chunks)
prompt = (
f"SYSTEM:\n{system_msg}\n\n"
f"CONTEXT:\n{context_text}\n\n"
f"USER:\n{user_question}\n\n"
"ASSISTANT:"
)
resp = llama.create_completion(
prompt=prompt,
max_tokens=256,
temperature=0.7,
stop=["\n\n"]
)
return resp["choices"][0]["text"].strip()
# Build a chat interface
with gr.Blocks() as demo:
gr.Markdown("## PDF RAG Chatbot (LangChain + LLaMA)")
upload = gr.File(label="Upload PDF")
chatbot = gr.Chatbot()
user_input = gr.Textbox(placeholder="Type your question and hit Send")
send_btn = gr.Button("Send")
def chat_step(pdf_file, message, chat_history):
response = answer_from_pdf(pdf_file, message)
chat_history = chat_history + [(message, response)]
return chat_history, ""
send_btn.click(
chat_step,
inputs=[upload, user_input, chatbot],
outputs=[chatbot, user_input]
)
if __name__ == "__main__":
demo.launch() |