Spaces:

Turbiling
/

RagPDF_Reader

Runtime error

App Files Files Community

RagPDF_Reader / app.py

Turbiling

Create app.py

7646bd1 verified 3 months ago

raw

history blame contribute delete

4.86 kB

	import os
	import gradio as gr
	import PyPDF2
	import faiss
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from groq import Groq

	# -------- Step 1: Set and Verify Groq API Key --------
	os.environ["GROQ_API_KEY"] = "your-real-groq-api-key" # Replace this
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	raise ValueError("❌ GROQ_API_KEY not found.")

	client = Groq(api_key=api_key)

	# -------- Step 2: Setup Model --------
	model = SentenceTransformer("all-MiniLM-L6-v2")
	faiss_index = None
	chunks_list = []

	# -------- Step 3: Extract Text from PDF --------
	def extract_text_from_pdf(pdf_path, logs):
	try:
	text = ""
	logs += "📖 Step 1: Reading PDF...\n"
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	if not text.strip():
	raise ValueError("❌ No readable text found in PDF.")
	logs += "✅ Text extracted successfully.\n"
	return text, logs
	except Exception as e:
	logs += f"❌ Error during PDF text extraction: {str(e)}\n"
	return None, logs

	# -------- Step 4: Chunking --------
	def create_chunks(text, chunk_size, logs):
	try:
	logs += "🧩 Step 2: Creating text chunks...\n"
	words = text.split()
	chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
	logs += f"✅ {len(chunks)} chunks created.\n"
	return chunks, logs
	except Exception as e:
	logs += f"❌ Error in chunking: {str(e)}\n"
	return None, logs

	# -------- Step 5: Embeddings & Indexing --------
	def embed_chunks(chunks, logs):
	try:
	logs += "📊 Step 3: Generating embeddings and creating FAISS index...\n"
	embeddings = model.encode(chunks)
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(np.array(embeddings))
	logs += "✅ Embeddings & index created successfully.\n"
	return index, chunks, logs
	except Exception as e:
	logs += f"❌ Error in embedding/indexing: {str(e)}\n"
	return None, None, logs

	# -------- Step 6: Process PDF --------
	def process_pdf(file, chunk_size=200):
	global faiss_index, chunks_list
	logs = "📂 File uploaded successfully.\n"
	try:
	text, logs = extract_text_from_pdf(file.name, logs)
	if not text:
	return logs

	chunks, logs = create_chunks(text, chunk_size, logs)
	if not chunks:
	return logs

	index, chunks, logs = embed_chunks(chunks, logs)
	if not index:
	return logs

	faiss_index = index
	chunks_list = chunks
	logs += "🎉 Step 4: PDF processed and ready for Q&A.\n"
	return logs
	except Exception as e:
	logs += f"❌ Error during processing: {str(e)}\n"
	return logs

	# -------- Step 7: Ask Question --------
	def answer_question(query):
	logs = "🤖 Step 5: Processing your question...\n"
	try:
	if faiss_index is None:
	return "❌ Please process a PDF first."

	query_embedding = model.encode([query])
	_, I = faiss_index.search(np.array(query_embedding), k=3)
	relevant_chunks = [chunks_list[i] for i in I[0] if i < len(chunks_list)]

	if not relevant_chunks:
	return "❌ No relevant content found to answer your question."

	context = "\n".join(relevant_chunks)
	prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}"

	logs += "🧠 Sending to Groq LLaMA3 model...\n"
	response = client.chat.completions.create(
	model="llama3-70b-8192",
	messages=[{"role": "user", "content": prompt}]
	)

	answer = response.choices[0].message.content
	logs += "✅ Answer generated successfully.\n"
	return f"{answer}\n\n{logs}"

	except Exception as e:
	logs += f"❌ Error during answering: {str(e)}\n"
	return logs

	# -------- Step 8: Gradio UI --------
	with gr.Blocks() as demo:
	gr.Markdown("## 🤖 RAG PDF Q&A App with Groq + FAISS (Debug-Friendly)")

	with gr.Row():
	pdf_input = gr.File(label="📂 Upload PDF", type="filepath")
	process_btn = gr.Button("⚙️ Process PDF")
	log_output = gr.Textbox(label="📝 Logs", lines=20)

	with gr.Row():
	question_input = gr.Textbox(label="❓ Ask a Question")
	answer_btn = gr.Button("💬 Get Answer")
	answer_output = gr.Textbox(label="📜 Answer + Logs", lines=20)

	process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=log_output)
	answer_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)

	demo.launch()