RagPDF_Reader / app.py
Turbiling's picture
Create app.py
7646bd1 verified
import os
import gradio as gr
import PyPDF2
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq
# -------- Step 1: Set and Verify Groq API Key --------
os.environ["GROQ_API_KEY"] = "your-real-groq-api-key" # Replace this
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise ValueError("❌ GROQ_API_KEY not found.")
client = Groq(api_key=api_key)
# -------- Step 2: Setup Model --------
model = SentenceTransformer("all-MiniLM-L6-v2")
faiss_index = None
chunks_list = []
# -------- Step 3: Extract Text from PDF --------
def extract_text_from_pdf(pdf_path, logs):
try:
text = ""
logs += "πŸ“– Step 1: Reading PDF...\n"
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
if not text.strip():
raise ValueError("❌ No readable text found in PDF.")
logs += "βœ… Text extracted successfully.\n"
return text, logs
except Exception as e:
logs += f"❌ Error during PDF text extraction: {str(e)}\n"
return None, logs
# -------- Step 4: Chunking --------
def create_chunks(text, chunk_size, logs):
try:
logs += "🧩 Step 2: Creating text chunks...\n"
words = text.split()
chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
logs += f"βœ… {len(chunks)} chunks created.\n"
return chunks, logs
except Exception as e:
logs += f"❌ Error in chunking: {str(e)}\n"
return None, logs
# -------- Step 5: Embeddings & Indexing --------
def embed_chunks(chunks, logs):
try:
logs += "πŸ“Š Step 3: Generating embeddings and creating FAISS index...\n"
embeddings = model.encode(chunks)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
logs += "βœ… Embeddings & index created successfully.\n"
return index, chunks, logs
except Exception as e:
logs += f"❌ Error in embedding/indexing: {str(e)}\n"
return None, None, logs
# -------- Step 6: Process PDF --------
def process_pdf(file, chunk_size=200):
global faiss_index, chunks_list
logs = "πŸ“‚ File uploaded successfully.\n"
try:
text, logs = extract_text_from_pdf(file.name, logs)
if not text:
return logs
chunks, logs = create_chunks(text, chunk_size, logs)
if not chunks:
return logs
index, chunks, logs = embed_chunks(chunks, logs)
if not index:
return logs
faiss_index = index
chunks_list = chunks
logs += "πŸŽ‰ Step 4: PDF processed and ready for Q&A.\n"
return logs
except Exception as e:
logs += f"❌ Error during processing: {str(e)}\n"
return logs
# -------- Step 7: Ask Question --------
def answer_question(query):
logs = "πŸ€– Step 5: Processing your question...\n"
try:
if faiss_index is None:
return "❌ Please process a PDF first."
query_embedding = model.encode([query])
_, I = faiss_index.search(np.array(query_embedding), k=3)
relevant_chunks = [chunks_list[i] for i in I[0] if i < len(chunks_list)]
if not relevant_chunks:
return "❌ No relevant content found to answer your question."
context = "\n".join(relevant_chunks)
prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}"
logs += "🧠 Sending to Groq LLaMA3 model...\n"
response = client.chat.completions.create(
model="llama3-70b-8192",
messages=[{"role": "user", "content": prompt}]
)
answer = response.choices[0].message.content
logs += "βœ… Answer generated successfully.\n"
return f"{answer}\n\n{logs}"
except Exception as e:
logs += f"❌ Error during answering: {str(e)}\n"
return logs
# -------- Step 8: Gradio UI --------
with gr.Blocks() as demo:
gr.Markdown("## πŸ€– RAG PDF Q&A App with Groq + FAISS (Debug-Friendly)")
with gr.Row():
pdf_input = gr.File(label="πŸ“‚ Upload PDF", type="filepath")
process_btn = gr.Button("βš™οΈ Process PDF")
log_output = gr.Textbox(label="πŸ“ Logs", lines=20)
with gr.Row():
question_input = gr.Textbox(label="❓ Ask a Question")
answer_btn = gr.Button("πŸ’¬ Get Answer")
answer_output = gr.Textbox(label="πŸ“œ Answer + Logs", lines=20)
process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=log_output)
answer_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)
demo.launch()