Adieee5's picture
Upload 8 files
e9ce2a7 verified
import gradio as gr
import os
import uuid
import tempfile
from typing import List, Tuple, Optional
from config import Config
from pdf_processor import PDFProcessor
from vector_store import VectorStore
from rag_engine import RAGEngine
# Initialize components
pdf_processor = PDFProcessor(
chunk_size=Config.CHUNK_SIZE,
chunk_overlap=Config.CHUNK_OVERLAP
)
vector_store = VectorStore(
model_name=Config.EMBEDDING_MODEL,
vector_db_path=Config.VECTOR_DB_PATH
)
rag_engine = RAGEngine(vector_store)
def upload_and_process_pdfs(files: List[tempfile._TemporaryFileWrapper]) -> str:
"""Process uploaded PDF files and add them to the vector store."""
if not files:
return "โŒ No files uploaded."
try:
uploaded_files = []
total_chunks = 0
for file in files:
if file is None:
continue
file_path = file.name
filename = os.path.basename(file_path)
# Check if it's a PDF
if not filename.lower().endswith('.pdf'):
continue
# Process PDF
chunks = pdf_processor.extract_text_from_pdf(file_path)
# Add to vector store
vector_store.add_documents(chunks)
uploaded_files.append(filename)
total_chunks += len(chunks)
if uploaded_files:
stats = vector_store.get_stats()
return f"โœ… Successfully processed {len(uploaded_files)} PDF(s):\n" + \
f"๐Ÿ“„ Files: {', '.join(uploaded_files)}\n" + \
f"๐Ÿ“Š Total chunks created: {total_chunks}\n" + \
f"๐Ÿ—ƒ๏ธ Database now contains {stats['total_documents']} total documents"
else:
return "โŒ No valid PDF files found."
except Exception as e:
return f"โŒ Error processing files: {str(e)}"
def get_database_stats() -> str:
"""Get current database statistics."""
stats = vector_store.get_stats()
return f"๐Ÿ“Š **Database Statistics**\n\n" + \
f"๐Ÿ“„ Total Documents: {stats['total_documents']}\n" + \
f"๐Ÿ” Index Size: {stats['index_size']}\n" + \
f"๐Ÿ“ Vector Dimension: {stats.get('dimension', 'N/A')}"
def clear_database() -> str:
"""Clear the entire vector database."""
try:
vector_store.clear_index()
return "โœ… Database cleared successfully!"
except Exception as e:
return f"โŒ Error clearing database: {str(e)}"
def respond(message: str, chat_history: List[dict]) -> Tuple[str, List[dict]]:
"""Chat function that handles the new messages format."""
if not message.strip():
return "", chat_history
try:
# Get response from RAG engine
result = rag_engine.generate_answer(message, top_k=Config.TOP_K)
response = result['answer']
sources = result.get('sources', [])
# Add source information to response
if sources:
response += "\n\n**๐Ÿ“š Sources:**\n"
for i, source in enumerate(sources[:3], 1):
response += f"{i}. ๐Ÿ“„ **{source['source_file']}** (Page {source['page_number']})\n"
response += f" ๐Ÿ“ _{source['content_preview']}_\n"
# Add user message to chat history
chat_history.append({"role": "user", "content": message})
# Add assistant response to chat history
chat_history.append({"role": "assistant", "content": response})
return "", chat_history
except Exception as e:
error_response = f"โŒ Error: {str(e)}"
# Add user message and error response to chat history
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": error_response})
return "", chat_history
def create_interface():
"""Create the Gradio interface."""
with gr.Blocks(title="PDF RAG System") as interface:
# Header
gr.Markdown("# ๐Ÿค– PDF RAG Assistant")
gr.Markdown("Upload PDFs and ask intelligent questions about their content using AI")
with gr.Tabs():
# Tab 1: Document Management
with gr.Tab("๐Ÿ“ Document Management"):
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("## ๐Ÿ“ค Upload PDF Documents")
gr.Markdown("Drag and drop your PDF files or click to browse")
file_upload = gr.File(
file_count="multiple",
file_types=[".pdf"],
label="Select PDF files to upload"
)
upload_btn = gr.Button(
"๐Ÿš€ Process PDFs",
variant="primary",
size="lg"
)
upload_status = gr.Textbox(
label="๐Ÿ“Š Upload Status",
interactive=False,
max_lines=8
)
with gr.Column(scale=1):
gr.Markdown("## ๐Ÿ—„๏ธ Database Management")
stats_display = gr.Markdown(get_database_stats())
with gr.Row():
refresh_btn = gr.Button("๐Ÿ”„ Refresh", size="sm", variant="secondary")
clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear Database", size="sm", variant="stop")
clear_status = gr.Textbox(
label="๐Ÿ”ง Database Status",
interactive=False,
max_lines=3
)
# Event handlers for document management
def update_stats_display():
return get_database_stats()
upload_btn.click(
fn=upload_and_process_pdfs,
inputs=[file_upload],
outputs=[upload_status]
).then(
fn=update_stats_display,
outputs=[stats_display]
)
refresh_btn.click(
fn=update_stats_display,
outputs=[stats_display]
)
clear_btn.click(
fn=clear_database,
outputs=[clear_status]
).then(
fn=update_stats_display,
outputs=[stats_display]
)
# Tab 2: Chat Interface
with gr.Tab("๐Ÿ’ฌ AI Assistant"):
gr.Markdown("## ๐Ÿค– Ask questions about your uploaded documents")
gr.Markdown("**๐Ÿ’ก Tips:** Upload PDFs first, then ask specific questions about their content for detailed answers with source references.")
# Create chat interface with messages format
chatbot = gr.Chatbot(
height=500,
show_label=False,
type="messages",
value=[{
"role": "assistant",
"content": "๐Ÿ‘‹ **Welcome to PDF RAG Assistant!**\n\nI'm here to help you analyze and understand your PDF documents. \n\n๐Ÿ“‹ **Getting started:**\n1. Upload PDFs in the 'Document Management' tab\n2. Come back here and ask me questions\n3. I'll provide detailed answers with source references\n\n๐Ÿš€ **Ready to get started?**"
}]
)
with gr.Row():
msg_input = gr.Textbox(
placeholder="๐Ÿ’ญ Ask a question about your documents...",
label="Your Question",
lines=2,
scale=4
)
send_btn = gr.Button(
"๐Ÿ“จ Send",
variant="primary",
size="lg",
scale=1
)
clear_chat_btn = gr.Button(
"๐Ÿงน Clear Chat",
variant="secondary",
size="sm"
)
# Event handlers for chat
send_btn.click(
fn=respond,
inputs=[msg_input, chatbot],
outputs=[msg_input, chatbot]
)
msg_input.submit(
fn=respond,
inputs=[msg_input, chatbot],
outputs=[msg_input, chatbot]
)
clear_chat_btn.click(
fn=lambda: [{
"role": "assistant",
"content": "๐Ÿ‘‹ **Welcome back!**\n\nI'm ready to help you with your PDF documents again. What would you like to know?"
}],
outputs=[chatbot]
)
# Tab 3: System Information
with gr.Tab("โ„น๏ธ System Information"):
gr.Markdown("# โš™๏ธ System Configuration & Information")
with gr.Row():
with gr.Column():
gr.Markdown("## ๐Ÿ”ง Current Settings")
settings_info = f"""
**๐Ÿง  Embedding Model:** `{Config.EMBEDDING_MODEL}`
**๐Ÿ“ Chunk Size:** {Config.CHUNK_SIZE} characters
**๐Ÿ”— Chunk Overlap:** {Config.CHUNK_OVERLAP} characters
**๐ŸŽฏ Search Results:** Top {Config.TOP_K} most relevant chunks
**๐Ÿ“ Max File Size:** 16MB per PDF
"""
gr.Markdown(settings_info)
with gr.Column():
gr.Markdown("## ๐Ÿš€ Key Features")
features_info = """
โœ… Multiple PDF upload and processing
โœ… Intelligent text chunking
โœ… Vector similarity search using FAISS
โœ… AI-powered Q&A with Google Gemini
โœ… Source attribution with page numbers
โœ… Persistent vector database storage
โœ… Real-time chat interface
โœ… Responsive modern UI
"""
gr.Markdown(features_info)
gr.Markdown("## ๐Ÿ› ๏ธ Technology Stack")
with gr.Row():
with gr.Column():
gr.Markdown("**๐Ÿ–ฅ๏ธ Framework:** Gradio 4.44+")
gr.Markdown("**๐Ÿ“„ PDF Processing:** PyMuPDF")
with gr.Column():
gr.Markdown("**๐Ÿงฎ Embeddings:** Sentence Transformers")
gr.Markdown("**๐Ÿ—ƒ๏ธ Vector Database:** FAISS")
with gr.Column():
gr.Markdown("**๐Ÿค– Language Model:** Google Gemini 1.5")
gr.Markdown("## ๐Ÿ“ Quick Start Guide")
guide_info = """
**1.** Upload Documents - Go to 'Document Management' tab and upload your PDF files
**2.** Process & Index - Wait for the system to extract text and create embeddings
**3.** Ask Questions - Switch to 'AI Assistant' tab and start asking questions
**4.** Get Intelligent Answers - Receive detailed responses with source references and page numbers
"""
gr.Markdown(guide_info)
return interface
if __name__ == "__main__":
# Create and launch the interface
interface = create_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)