Spaces:

samiasohail25gmailcom
/

demo6

Sleeping

App Files Files Community

demo6 / app.py

samiasohail25gmailcom

Update app.py

d340d21 verified 9 days ago

raw

history blame contribute delete

4.03 kB

	import streamlit as st
	import os
	from groq import Groq
	import tempfile
	from PyPDF2 import PdfReader
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import gdown
	import re

	# -------------------- CONFIG -------------------- #
	# Set your Groq API key from environment
	client = Groq(api_key=os.environ.get("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"))

	# Predefined list of Google Drive PDF links (public/viewable)
	PDF_LINKS = [
	"https://drive.google.com/file/d/14ZxfkSKBbvHINQnE9gWY8fwlaV9KBD4Y/view?usp=sharing",
	"https://drive.google.com/file/d/1LONqUI8Us6WDbWi35ueM5qin9bIhI0Bz/view?usp=sharing",
	"https://drive.google.com/file/d/1Robrjv3n9ckEcXUJj7zQw5o7YZ4C52tf/view?usp=sharing",
	"https://drive.google.com/file/d/113_0ixx4LyjCtfZHZYePbYMRs8mdfieG/view?usp=sharing"
	]

	# -------------------- UTILITY FUNCTIONS -------------------- #

	# Convert Google Drive URL to downloadable ID
	def extract_gdrive_id(url):
	match = re.search(r'/d/([a-zA-Z0-9_-]+)', url)
	return match.group(1) if match else None

	# Download file from Google Drive
	def download_from_gdrive(gdrive_url):
	file_id = extract_gdrive_id(gdrive_url)
	if not file_id:
	return None
	download_url = f"https://drive.google.com/uc?id={file_id}"
	output = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
	gdown.download(download_url, output, quiet=True)
	return output

	# Extract raw text from a PDF
	def extract_text(file_path):
	reader = PdfReader(file_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text() or ""
	return text

	# Chunk text into small pieces
	def chunk_text(text, chunk_size=300):
	words = text.split()
	return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

	# Create FAISS index
	def create_faiss_index(chunks):
	embeddings = embed_model.encode(chunks)
	dimension = embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings))
	return index, embeddings

	# Search top-k chunks
	def search_chunks(query, chunks, index):
	query_embedding = embed_model.encode([query])
	D, I = index.search(np.array(query_embedding), k=3)
	return [chunks[i] for i in I[0]]

	# -------------------- MAIN APP -------------------- #

	# Load embedding model
	embed_model = SentenceTransformer('all-MiniLM-L6-v2')

	st.title("📚 ReliefBot – Auto-Fetched Disaster PDFs with QA")

	if "initialized" not in st.session_state:
	st.session_state.initialized = False

	if not st.session_state.initialized:
	all_chunks = []

	with st.spinner("📥 Downloading & processing predefined documents..."):
	for url in PDF_LINKS:
	path = download_from_gdrive(url.strip())
	if path:
	raw_text = extract_text(path)
	chunks = chunk_text(raw_text)
	all_chunks.extend(chunks)
	else:
	st.warning(f"⚠️ Failed to download: {url}")

	if all_chunks:
	index, _ = create_faiss_index(all_chunks)
	st.session_state.chunks = all_chunks
	st.session_state.index = index
	st.session_state.initialized = True
	st.success("✅ Documents downloaded and indexed successfully!")

	# Question interface
	if st.session_state.initialized:
	query = st.text_input("❓ Ask your question about disaster safety or documents:")

	if query:
	relevant_chunks = search_chunks(query, st.session_state.chunks, st.session_state.index)
	context = "\n".join(relevant_chunks)
	prompt = f"Answer based on this context:\n{context}\n\nQuestion: {query}"

	with st.spinner("🤖 Generating answer..."):
	response = client.chat.completions.create(
	messages=[{"role": "user", "content": prompt}],
	model="llama-3.1-8b-instant"
	)
	st.markdown("### 💬 ReliefBot Answer:")
	st.write(response.choices[0].message.content)