Spaces:
Runtime error
Runtime error
File size: 4,863 Bytes
7646bd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import os
import gradio as gr
import PyPDF2
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from groq import Groq
# -------- Step 1: Set and Verify Groq API Key --------
os.environ["GROQ_API_KEY"] = "your-real-groq-api-key" # Replace this
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise ValueError("β GROQ_API_KEY not found.")
client = Groq(api_key=api_key)
# -------- Step 2: Setup Model --------
model = SentenceTransformer("all-MiniLM-L6-v2")
faiss_index = None
chunks_list = []
# -------- Step 3: Extract Text from PDF --------
def extract_text_from_pdf(pdf_path, logs):
try:
text = ""
logs += "π Step 1: Reading PDF...\n"
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
if not text.strip():
raise ValueError("β No readable text found in PDF.")
logs += "β
Text extracted successfully.\n"
return text, logs
except Exception as e:
logs += f"β Error during PDF text extraction: {str(e)}\n"
return None, logs
# -------- Step 4: Chunking --------
def create_chunks(text, chunk_size, logs):
try:
logs += "π§© Step 2: Creating text chunks...\n"
words = text.split()
chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
logs += f"β
{len(chunks)} chunks created.\n"
return chunks, logs
except Exception as e:
logs += f"β Error in chunking: {str(e)}\n"
return None, logs
# -------- Step 5: Embeddings & Indexing --------
def embed_chunks(chunks, logs):
try:
logs += "π Step 3: Generating embeddings and creating FAISS index...\n"
embeddings = model.encode(chunks)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
logs += "β
Embeddings & index created successfully.\n"
return index, chunks, logs
except Exception as e:
logs += f"β Error in embedding/indexing: {str(e)}\n"
return None, None, logs
# -------- Step 6: Process PDF --------
def process_pdf(file, chunk_size=200):
global faiss_index, chunks_list
logs = "π File uploaded successfully.\n"
try:
text, logs = extract_text_from_pdf(file.name, logs)
if not text:
return logs
chunks, logs = create_chunks(text, chunk_size, logs)
if not chunks:
return logs
index, chunks, logs = embed_chunks(chunks, logs)
if not index:
return logs
faiss_index = index
chunks_list = chunks
logs += "π Step 4: PDF processed and ready for Q&A.\n"
return logs
except Exception as e:
logs += f"β Error during processing: {str(e)}\n"
return logs
# -------- Step 7: Ask Question --------
def answer_question(query):
logs = "π€ Step 5: Processing your question...\n"
try:
if faiss_index is None:
return "β Please process a PDF first."
query_embedding = model.encode([query])
_, I = faiss_index.search(np.array(query_embedding), k=3)
relevant_chunks = [chunks_list[i] for i in I[0] if i < len(chunks_list)]
if not relevant_chunks:
return "β No relevant content found to answer your question."
context = "\n".join(relevant_chunks)
prompt = f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}"
logs += "π§ Sending to Groq LLaMA3 model...\n"
response = client.chat.completions.create(
model="llama3-70b-8192",
messages=[{"role": "user", "content": prompt}]
)
answer = response.choices[0].message.content
logs += "β
Answer generated successfully.\n"
return f"{answer}\n\n{logs}"
except Exception as e:
logs += f"β Error during answering: {str(e)}\n"
return logs
# -------- Step 8: Gradio UI --------
with gr.Blocks() as demo:
gr.Markdown("## π€ RAG PDF Q&A App with Groq + FAISS (Debug-Friendly)")
with gr.Row():
pdf_input = gr.File(label="π Upload PDF", type="filepath")
process_btn = gr.Button("βοΈ Process PDF")
log_output = gr.Textbox(label="π Logs", lines=20)
with gr.Row():
question_input = gr.Textbox(label="β Ask a Question")
answer_btn = gr.Button("π¬ Get Answer")
answer_output = gr.Textbox(label="π Answer + Logs", lines=20)
process_btn.click(fn=process_pdf, inputs=pdf_input, outputs=log_output)
answer_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)
demo.launch()
|