Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import uuid | |
import tempfile | |
from typing import List, Tuple, Optional | |
from config import Config | |
from pdf_processor import PDFProcessor | |
from vector_store import VectorStore | |
from rag_engine import RAGEngine | |
# Initialize components | |
pdf_processor = PDFProcessor( | |
chunk_size=Config.CHUNK_SIZE, | |
chunk_overlap=Config.CHUNK_OVERLAP | |
) | |
vector_store = VectorStore( | |
model_name=Config.EMBEDDING_MODEL, | |
vector_db_path=Config.VECTOR_DB_PATH | |
) | |
rag_engine = RAGEngine(vector_store) | |
def upload_and_process_pdfs(files: List[tempfile._TemporaryFileWrapper]) -> str: | |
"""Process uploaded PDF files and add them to the vector store.""" | |
if not files: | |
return "โ No files uploaded." | |
try: | |
uploaded_files = [] | |
total_chunks = 0 | |
for file in files: | |
if file is None: | |
continue | |
file_path = file.name | |
filename = os.path.basename(file_path) | |
# Check if it's a PDF | |
if not filename.lower().endswith('.pdf'): | |
continue | |
# Process PDF | |
chunks = pdf_processor.extract_text_from_pdf(file_path) | |
# Add to vector store | |
vector_store.add_documents(chunks) | |
uploaded_files.append(filename) | |
total_chunks += len(chunks) | |
if uploaded_files: | |
stats = vector_store.get_stats() | |
return f"โ Successfully processed {len(uploaded_files)} PDF(s):\n" + \ | |
f"๐ Files: {', '.join(uploaded_files)}\n" + \ | |
f"๐ Total chunks created: {total_chunks}\n" + \ | |
f"๐๏ธ Database now contains {stats['total_documents']} total documents" | |
else: | |
return "โ No valid PDF files found." | |
except Exception as e: | |
return f"โ Error processing files: {str(e)}" | |
def get_database_stats() -> str: | |
"""Get current database statistics.""" | |
stats = vector_store.get_stats() | |
return f"๐ **Database Statistics**\n\n" + \ | |
f"๐ Total Documents: {stats['total_documents']}\n" + \ | |
f"๐ Index Size: {stats['index_size']}\n" + \ | |
f"๐ Vector Dimension: {stats.get('dimension', 'N/A')}" | |
def clear_database() -> str: | |
"""Clear the entire vector database.""" | |
try: | |
vector_store.clear_index() | |
return "โ Database cleared successfully!" | |
except Exception as e: | |
return f"โ Error clearing database: {str(e)}" | |
def respond(message: str, chat_history: List[dict]) -> Tuple[str, List[dict]]: | |
"""Chat function that handles the new messages format.""" | |
if not message.strip(): | |
return "", chat_history | |
try: | |
# Get response from RAG engine | |
result = rag_engine.generate_answer(message, top_k=Config.TOP_K) | |
response = result['answer'] | |
sources = result.get('sources', []) | |
# Add source information to response | |
if sources: | |
response += "\n\n**๐ Sources:**\n" | |
for i, source in enumerate(sources[:3], 1): | |
response += f"{i}. ๐ **{source['source_file']}** (Page {source['page_number']})\n" | |
response += f" ๐ _{source['content_preview']}_\n" | |
# Add user message to chat history | |
chat_history.append({"role": "user", "content": message}) | |
# Add assistant response to chat history | |
chat_history.append({"role": "assistant", "content": response}) | |
return "", chat_history | |
except Exception as e: | |
error_response = f"โ Error: {str(e)}" | |
# Add user message and error response to chat history | |
chat_history.append({"role": "user", "content": message}) | |
chat_history.append({"role": "assistant", "content": error_response}) | |
return "", chat_history | |
def create_interface(): | |
"""Create the Gradio interface.""" | |
with gr.Blocks(title="PDF RAG System") as interface: | |
# Header | |
gr.Markdown("# ๐ค PDF RAG Assistant") | |
gr.Markdown("Upload PDFs and ask intelligent questions about their content using AI") | |
with gr.Tabs(): | |
# Tab 1: Document Management | |
with gr.Tab("๐ Document Management"): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
gr.Markdown("## ๐ค Upload PDF Documents") | |
gr.Markdown("Drag and drop your PDF files or click to browse") | |
file_upload = gr.File( | |
file_count="multiple", | |
file_types=[".pdf"], | |
label="Select PDF files to upload" | |
) | |
upload_btn = gr.Button( | |
"๐ Process PDFs", | |
variant="primary", | |
size="lg" | |
) | |
upload_status = gr.Textbox( | |
label="๐ Upload Status", | |
interactive=False, | |
max_lines=8 | |
) | |
with gr.Column(scale=1): | |
gr.Markdown("## ๐๏ธ Database Management") | |
stats_display = gr.Markdown(get_database_stats()) | |
with gr.Row(): | |
refresh_btn = gr.Button("๐ Refresh", size="sm", variant="secondary") | |
clear_btn = gr.Button("๐๏ธ Clear Database", size="sm", variant="stop") | |
clear_status = gr.Textbox( | |
label="๐ง Database Status", | |
interactive=False, | |
max_lines=3 | |
) | |
# Event handlers for document management | |
def update_stats_display(): | |
return get_database_stats() | |
upload_btn.click( | |
fn=upload_and_process_pdfs, | |
inputs=[file_upload], | |
outputs=[upload_status] | |
).then( | |
fn=update_stats_display, | |
outputs=[stats_display] | |
) | |
refresh_btn.click( | |
fn=update_stats_display, | |
outputs=[stats_display] | |
) | |
clear_btn.click( | |
fn=clear_database, | |
outputs=[clear_status] | |
).then( | |
fn=update_stats_display, | |
outputs=[stats_display] | |
) | |
# Tab 2: Chat Interface | |
with gr.Tab("๐ฌ AI Assistant"): | |
gr.Markdown("## ๐ค Ask questions about your uploaded documents") | |
gr.Markdown("**๐ก Tips:** Upload PDFs first, then ask specific questions about their content for detailed answers with source references.") | |
# Create chat interface with messages format | |
chatbot = gr.Chatbot( | |
height=500, | |
show_label=False, | |
type="messages", | |
value=[{ | |
"role": "assistant", | |
"content": "๐ **Welcome to PDF RAG Assistant!**\n\nI'm here to help you analyze and understand your PDF documents. \n\n๐ **Getting started:**\n1. Upload PDFs in the 'Document Management' tab\n2. Come back here and ask me questions\n3. I'll provide detailed answers with source references\n\n๐ **Ready to get started?**" | |
}] | |
) | |
with gr.Row(): | |
msg_input = gr.Textbox( | |
placeholder="๐ญ Ask a question about your documents...", | |
label="Your Question", | |
lines=2, | |
scale=4 | |
) | |
send_btn = gr.Button( | |
"๐จ Send", | |
variant="primary", | |
size="lg", | |
scale=1 | |
) | |
clear_chat_btn = gr.Button( | |
"๐งน Clear Chat", | |
variant="secondary", | |
size="sm" | |
) | |
# Event handlers for chat | |
send_btn.click( | |
fn=respond, | |
inputs=[msg_input, chatbot], | |
outputs=[msg_input, chatbot] | |
) | |
msg_input.submit( | |
fn=respond, | |
inputs=[msg_input, chatbot], | |
outputs=[msg_input, chatbot] | |
) | |
clear_chat_btn.click( | |
fn=lambda: [{ | |
"role": "assistant", | |
"content": "๐ **Welcome back!**\n\nI'm ready to help you with your PDF documents again. What would you like to know?" | |
}], | |
outputs=[chatbot] | |
) | |
# Tab 3: System Information | |
with gr.Tab("โน๏ธ System Information"): | |
gr.Markdown("# โ๏ธ System Configuration & Information") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## ๐ง Current Settings") | |
settings_info = f""" | |
**๐ง Embedding Model:** `{Config.EMBEDDING_MODEL}` | |
**๐ Chunk Size:** {Config.CHUNK_SIZE} characters | |
**๐ Chunk Overlap:** {Config.CHUNK_OVERLAP} characters | |
**๐ฏ Search Results:** Top {Config.TOP_K} most relevant chunks | |
**๐ Max File Size:** 16MB per PDF | |
""" | |
gr.Markdown(settings_info) | |
with gr.Column(): | |
gr.Markdown("## ๐ Key Features") | |
features_info = """ | |
โ Multiple PDF upload and processing | |
โ Intelligent text chunking | |
โ Vector similarity search using FAISS | |
โ AI-powered Q&A with Google Gemini | |
โ Source attribution with page numbers | |
โ Persistent vector database storage | |
โ Real-time chat interface | |
โ Responsive modern UI | |
""" | |
gr.Markdown(features_info) | |
gr.Markdown("## ๐ ๏ธ Technology Stack") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("**๐ฅ๏ธ Framework:** Gradio 4.44+") | |
gr.Markdown("**๐ PDF Processing:** PyMuPDF") | |
with gr.Column(): | |
gr.Markdown("**๐งฎ Embeddings:** Sentence Transformers") | |
gr.Markdown("**๐๏ธ Vector Database:** FAISS") | |
with gr.Column(): | |
gr.Markdown("**๐ค Language Model:** Google Gemini 1.5") | |
gr.Markdown("## ๐ Quick Start Guide") | |
guide_info = """ | |
**1.** Upload Documents - Go to 'Document Management' tab and upload your PDF files | |
**2.** Process & Index - Wait for the system to extract text and create embeddings | |
**3.** Ask Questions - Switch to 'AI Assistant' tab and start asking questions | |
**4.** Get Intelligent Answers - Receive detailed responses with source references and page numbers | |
""" | |
gr.Markdown(guide_info) | |
return interface | |
if __name__ == "__main__": | |
# Create and launch the interface | |
interface = create_interface() | |
interface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
show_error=True | |
) | |