import os import gradio as gr import PyPDF2 import faiss import numpy as np from sentence_transformers import SentenceTransformer from groq import Groq # -------- Step 1: Set and Verify Groq API Key -------- os.environ["GROQ_API_KEY"] = "your-real-groq-api-key" # Replace this api_key = os.getenv("GROQ_API_KEY") if not api_key: raise ValueError("❌ GROQ_API_KEY not found.") client = Groq(api_key=api_key) # -------- Step 2: Setup Model -------- model = SentenceTransformer("all-MiniLM-L6-v2") faiss_index = None chunks_list = [] # -------- Step 3: Extract Text from PDF -------- def extract_text_from_pdf(pdf_path, logs): try: text = "" logs += "📖 Step 1: Reading PDF...\n" with open(pdf_path, 'rb') as file: reader = PyPDF2.PdfReader(file) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text if not text.strip(): raise ValueError("❌ No readable text found in PDF.") logs += "✅ Text extracted successfully.\n" return text, logs except Exception as e: logs += f"❌ Error during PDF text extraction: {str(e)}\n" return None, logs # -------- Step 4: Chunking -------- def create_chunks(text, chunk_size, logs): try: logs += "🧩 Step 2: Creating text chunks...\n" words = text.split() chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] logs += f"✅ {len(chunks)} chunks created.\n" return chunks, logs except Exception as e: logs += f"❌ Error in chunking: {str(e)}\n" return None, logs # -------- Step 5: Embeddings & Indexing -------- def embed_chunks(chunks, logs): try: logs += "📊 Step 3: Generating embeddings and creating FAISS index...\n" embeddings = model.encode(chunks) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings)) logs += "✅ Embeddings & index created successfully.\n" return index, chunks, logs except Exception as e: logs += f"❌ Error in embedding/indexing: {str(e)}\n" return None, None, logs # -------- Step 6: Process PDF -------- def process_pdf(file, chunk_size=200): global faiss_index, chunks_list logs = "📂 File uploaded successfully.\n" try: text, logs = extract_text_from_pdf(file.name, logs) if not text: return logs chunks, logs = create_chunks(text, chunk_size, logs) if not chunks: return logs index, chunks, logs = embed_chunks(chunks, logs) if not index: return logs faiss_index = index chunks_list = chunks logs += "🎉 Step 4: PDF processed and ready for Q&A.\n" return logs except Exception as e: logs += f"❌ Error during processing: {str(e)}\n" return logs # -------- Step 7: Ask Question -------- def answer_question(query): logs = "🤖 Step 5: Processing your question...\n" try: if faiss_index is None: return "❌ Please process a PDF first." query_embedding = model.encode([query]) _, I = faiss_index.search(np.array(query_embedding), k=3) relevant_chunks = [chunks_list[i] for i in I[0] if i < len(chunks_list)] if not relevant_chunks: return "❌ No relevant content found to answer your question." context = "\n".join(relevant_chunks) prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}" logs += "🧠 Sending to Groq LLaMA3 model...\n" response = client.chat.completions.create( model="llama3-70b-8192", messages=[{"role": "user", "content": prompt}] ) answer = response.choices[0].message.content logs += "✅ Answer generated successfully.\n" return f"{answer}\n\n{logs}" except Exception as e: logs += f"❌ Error during answering: {str(e)}\n" return logs # -------- Step 8: Gradio UI -------- with gr.Blocks() as demo: gr.Markdown("## 🤖 RAG PDF Q&A App with Groq + FAISS (Debug-Friendly)") with gr.Row(): pdf_input = gr.File(label="📂 Upload PDF", type="filepath") process_btn = gr.Button("⚙️ Process PDF") log_output = gr.Textbox(label="📝 Logs", lines=20) with gr.Row(): question_input = gr.Textbox(label="❓ Ask a Question") answer_btn = gr.Button("💬 Get Answer") answer_output = gr.Textbox(label="📜 Answer + Logs", lines=20) process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=log_output) answer_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output) demo.launch()