Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import PyPDF2 | |
import faiss | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from groq import Groq | |
# -------- Step 1: Set and Verify Groq API Key -------- | |
os.environ["GROQ_API_KEY"] = "your-real-groq-api-key" # Replace this | |
api_key = os.getenv("GROQ_API_KEY") | |
if not api_key: | |
raise ValueError("β GROQ_API_KEY not found.") | |
client = Groq(api_key=api_key) | |
# -------- Step 2: Setup Model -------- | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
faiss_index = None | |
chunks_list = [] | |
# -------- Step 3: Extract Text from PDF -------- | |
def extract_text_from_pdf(pdf_path, logs): | |
try: | |
text = "" | |
logs += "π Step 1: Reading PDF...\n" | |
with open(pdf_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
for page in reader.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text | |
if not text.strip(): | |
raise ValueError("β No readable text found in PDF.") | |
logs += "β Text extracted successfully.\n" | |
return text, logs | |
except Exception as e: | |
logs += f"β Error during PDF text extraction: {str(e)}\n" | |
return None, logs | |
# -------- Step 4: Chunking -------- | |
def create_chunks(text, chunk_size, logs): | |
try: | |
logs += "π§© Step 2: Creating text chunks...\n" | |
words = text.split() | |
chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
logs += f"β {len(chunks)} chunks created.\n" | |
return chunks, logs | |
except Exception as e: | |
logs += f"β Error in chunking: {str(e)}\n" | |
return None, logs | |
# -------- Step 5: Embeddings & Indexing -------- | |
def embed_chunks(chunks, logs): | |
try: | |
logs += "π Step 3: Generating embeddings and creating FAISS index...\n" | |
embeddings = model.encode(chunks) | |
index = faiss.IndexFlatL2(embeddings.shape[1]) | |
index.add(np.array(embeddings)) | |
logs += "β Embeddings & index created successfully.\n" | |
return index, chunks, logs | |
except Exception as e: | |
logs += f"β Error in embedding/indexing: {str(e)}\n" | |
return None, None, logs | |
# -------- Step 6: Process PDF -------- | |
def process_pdf(file, chunk_size=200): | |
global faiss_index, chunks_list | |
logs = "π File uploaded successfully.\n" | |
try: | |
text, logs = extract_text_from_pdf(file.name, logs) | |
if not text: | |
return logs | |
chunks, logs = create_chunks(text, chunk_size, logs) | |
if not chunks: | |
return logs | |
index, chunks, logs = embed_chunks(chunks, logs) | |
if not index: | |
return logs | |
faiss_index = index | |
chunks_list = chunks | |
logs += "π Step 4: PDF processed and ready for Q&A.\n" | |
return logs | |
except Exception as e: | |
logs += f"β Error during processing: {str(e)}\n" | |
return logs | |
# -------- Step 7: Ask Question -------- | |
def answer_question(query): | |
logs = "π€ Step 5: Processing your question...\n" | |
try: | |
if faiss_index is None: | |
return "β Please process a PDF first." | |
query_embedding = model.encode([query]) | |
_, I = faiss_index.search(np.array(query_embedding), k=3) | |
relevant_chunks = [chunks_list[i] for i in I[0] if i < len(chunks_list)] | |
if not relevant_chunks: | |
return "β No relevant content found to answer your question." | |
context = "\n".join(relevant_chunks) | |
prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}" | |
logs += "π§ Sending to Groq LLaMA3 model...\n" | |
response = client.chat.completions.create( | |
model="llama3-70b-8192", | |
messages=[{"role": "user", "content": prompt}] | |
) | |
answer = response.choices[0].message.content | |
logs += "β Answer generated successfully.\n" | |
return f"{answer}\n\n{logs}" | |
except Exception as e: | |
logs += f"β Error during answering: {str(e)}\n" | |
return logs | |
# -------- Step 8: Gradio UI -------- | |
with gr.Blocks() as demo: | |
gr.Markdown("## π€ RAG PDF Q&A App with Groq + FAISS (Debug-Friendly)") | |
with gr.Row(): | |
pdf_input = gr.File(label="π Upload PDF", type="filepath") | |
process_btn = gr.Button("βοΈ Process PDF") | |
log_output = gr.Textbox(label="π Logs", lines=20) | |
with gr.Row(): | |
question_input = gr.Textbox(label="β Ask a Question") | |
answer_btn = gr.Button("π¬ Get Answer") | |
answer_output = gr.Textbox(label="π Answer + Logs", lines=20) | |
process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=log_output) | |
answer_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output) | |
demo.launch() | |