Spaces:

Turbiling
/

RagPDF_Reader

Runtime error

File size: 4,863 Bytes

7646bd1

import os
import gradio as gr
import PyPDF2
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq

# -------- Step 1: Set and Verify Groq API Key --------
os.environ["GROQ_API_KEY"] = "your-real-groq-api-key"  # Replace this
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
    raise ValueError("❌ GROQ_API_KEY not found.")

client = Groq(api_key=api_key)

# -------- Step 2: Setup Model --------
model = SentenceTransformer("all-MiniLM-L6-v2")
faiss_index = None
chunks_list = []

# -------- Step 3: Extract Text from PDF --------
def extract_text_from_pdf(pdf_path, logs):
    try:
        text = ""
        logs += "📖 Step 1: Reading PDF...\n"
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
        if not text.strip():
            raise ValueError("❌ No readable text found in PDF.")
        logs += "✅ Text extracted successfully.\n"
        return text, logs
    except Exception as e:
        logs += f"❌ Error during PDF text extraction: {str(e)}\n"
        return None, logs

# -------- Step 4: Chunking --------
def create_chunks(text, chunk_size, logs):
    try:
        logs += "🧩 Step 2: Creating text chunks...\n"
        words = text.split()
        chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
        logs += f"✅ {len(chunks)} chunks created.\n"
        return chunks, logs
    except Exception as e:
        logs += f"❌ Error in chunking: {str(e)}\n"
        return None, logs

# -------- Step 5: Embeddings & Indexing --------
def embed_chunks(chunks, logs):
    try:
        logs += "📊 Step 3: Generating embeddings and creating FAISS index...\n"
        embeddings = model.encode(chunks)
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))
        logs += "✅ Embeddings & index created successfully.\n"
        return index, chunks, logs
    except Exception as e:
        logs += f"❌ Error in embedding/indexing: {str(e)}\n"
        return None, None, logs

# -------- Step 6: Process PDF --------
def process_pdf(file, chunk_size=200):
    global faiss_index, chunks_list
    logs = "📂 File uploaded successfully.\n"
    try:
        text, logs = extract_text_from_pdf(file.name, logs)
        if not text:
            return logs

        chunks, logs = create_chunks(text, chunk_size, logs)
        if not chunks:
            return logs

        index, chunks, logs = embed_chunks(chunks, logs)
        if not index:
            return logs

        faiss_index = index
        chunks_list = chunks
        logs += "🎉 Step 4: PDF processed and ready for Q&A.\n"
        return logs
    except Exception as e:
        logs += f"❌ Error during processing: {str(e)}\n"
        return logs

# -------- Step 7: Ask Question --------
def answer_question(query):
    logs = "🤖 Step 5: Processing your question...\n"
    try:
        if faiss_index is None:
            return "❌ Please process a PDF first."

        query_embedding = model.encode([query])
        _, I = faiss_index.search(np.array(query_embedding), k=3)
        relevant_chunks = [chunks_list[i] for i in I[0] if i < len(chunks_list)]

        if not relevant_chunks:
            return "❌ No relevant content found to answer your question."

        context = "\n".join(relevant_chunks)
        prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}"

        logs += "🧠 Sending to Groq LLaMA3 model...\n"
        response = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[{"role": "user", "content": prompt}]
        )

        answer = response.choices[0].message.content
        logs += "✅ Answer generated successfully.\n"
        return f"{answer}\n\n{logs}"

    except Exception as e:
        logs += f"❌ Error during answering: {str(e)}\n"
        return logs

# -------- Step 8: Gradio UI --------
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 RAG PDF Q&A App with Groq + FAISS (Debug-Friendly)")

    with gr.Row():
        pdf_input = gr.File(label="📂 Upload PDF", type="filepath")
        process_btn = gr.Button("⚙️ Process PDF")
        log_output = gr.Textbox(label="📝 Logs", lines=20)

    with gr.Row():
        question_input = gr.Textbox(label="❓ Ask a Question")
        answer_btn = gr.Button("💬 Get Answer")
        answer_output = gr.Textbox(label="📜 Answer + Logs", lines=20)

    process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=log_output)
    answer_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)

demo.launch()