enhanced-rag-demo / streamlit_epic2_demo.py
Arthur Passuello
Cleaned up displayed content
1cdeab3
"""
Enhanced RISC-V RAG Interactive Demo
===================================
Technical demonstration of advanced RAG capabilities for RISC-V documentation
showcasing hybrid retrieval, neural reranking, and graph enhancement.
System: Enhanced RISC-V RAG with modular architecture
Data: RISC-V technical documentation corpus
Features: Neural reranking, graph enhancement, multi-backend support
"""
import streamlit as st
import sys
import os
from pathlib import Path
import time
import logging
from typing import Dict, Any, List
# Add demo utils to path
sys.path.append(str(Path(__file__).parent))
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Import system integration
try:
from demo.utils.system_integration import get_system_manager
from demo.utils.analytics_dashboard import analytics_dashboard
system_manager = get_system_manager()
except ImportError as e:
st.error(f"Failed to import system integration: {e}")
st.stop()
# Page configuration
st.set_page_config(
page_title="Enhanced RISC-V RAG Demo",
page_icon="πŸš€",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for professional appearance
st.markdown("""
<style>
.main-header {
background: linear-gradient(90deg, #2E86AB, #A23B72);
color: white;
padding: 1rem;
border-radius: 0.5rem;
margin-bottom: 2rem;
text-align: center;
}
.feature-badge {
background: #28a745;
color: white;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
font-size: 0.8rem;
font-weight: bold;
margin: 0.1rem;
}
.model-badge {
background: #17a2b8;
color: white;
padding: 0.2rem 0.4rem;
border-radius: 0.2rem;
font-size: 0.7rem;
margin: 0.1rem;
display: inline-block;
}
.status-online {
color: #28a745;
font-weight: bold;
}
.status-processing {
color: #ffc107;
font-weight: bold;
}
.metric-card {
background: #f8f9fa;
padding: 1rem;
border-radius: 0.5rem;
border-left: 4px solid #2E86AB;
margin: 0.5rem 0;
}
.stage-indicator {
padding: 0.5rem;
margin: 0.25rem;
border-radius: 0.25rem;
text-align: center;
font-weight: bold;
}
.stage-completed {
background: #d4edda;
color: #155724;
border: 1px solid #c3e6cb;
}
.stage-processing {
background: #fff3cd;
color: #856404;
border: 1px solid #ffeaa7;
}
.stage-pending {
background: #f8f9fa;
color: #6c757d;
border: 1px solid #dee2e6;
}
.error-message {
background: #f8d7da;
color: #721c24;
padding: 1rem;
border-radius: 0.5rem;
border-left: 4px solid #dc3545;
margin: 1rem 0;
}
.warning-message {
background: #fff3cd;
color: #856404;
padding: 1rem;
border-radius: 0.5rem;
border-left: 4px solid #ffc107;
margin: 1rem 0;
}
.info-message {
background: #d1ecf1;
color: #0c5460;
padding: 1rem;
border-radius: 0.5rem;
border-left: 4px solid #17a2b8;
margin: 1rem 0;
}
.footer {
text-align: center;
padding: 2rem;
border-top: 1px solid #dee2e6;
margin-top: 3rem;
color: #6c757d;
font-size: 0.9rem;
}
</style>
""", unsafe_allow_html=True)
def main():
"""Main application entry point"""
# Header
st.markdown("""
<div class="main-header">
<h1>πŸš€ Enhanced RISC-V RAG</h1>
<p>Interactive Demo - Advanced RAG System with Neural Reranking</p>
<span class="feature-badge">NEURAL RERANKING</span>
<span class="feature-badge">GRAPH ENHANCEMENT</span>
<span class="feature-badge">HYBRID SEARCH</span>
</div>
""", unsafe_allow_html=True)
# Sidebar navigation
st.sidebar.title("🎯 Navigation")
st.sidebar.markdown("---")
# Page selection
pages = {
"🏠 System Overview": "system_overview",
"πŸ’¬ Interactive Query": "interactive_query",
"πŸ“Š Results Analysis": "results_analysis",
"πŸ“ˆ Analytics & Monitoring": "analytics_monitoring",
"πŸ”§ Technical Deep-dive": "technical_deepdive"
}
# Handle forced navigation to overview
default_index = 0
if hasattr(st.session_state, 'force_overview') and st.session_state.force_overview:
default_index = 0
st.session_state.force_overview = False # Reset the flag
selected_page = st.sidebar.selectbox(
"Select Demo Page:",
list(pages.keys()),
index=default_index
)
# System status in sidebar
st.sidebar.markdown("---")
st.sidebar.markdown("### πŸ”„ System Status")
# Get system status from manager
system_status = system_manager.get_system_status()
# Initialize session state
if 'system_initialized' not in st.session_state:
st.session_state.system_initialized = system_manager.is_initialized
# Status indicators
if system_status["status"] == "Online":
st.sidebar.markdown("**Status:** <span class='status-online'>🟒 Online</span>", unsafe_allow_html=True)
st.sidebar.markdown(f"**Documents:** {system_status['documents']} processed")
st.sidebar.markdown(f"**Architecture:** {system_status['architecture'].title()}")
st.sidebar.markdown("**Advanced Features:** βœ… All Active")
else:
if not system_manager.is_initialized:
st.sidebar.markdown("**Status:** <span style='color: #dc3545; font-weight: bold;'>πŸ”΄ Needs Init</span>", unsafe_allow_html=True)
st.sidebar.markdown("**Action:** Go to System Overview")
st.sidebar.markdown("**Click:** Initialize Enhanced RAG System")
else:
st.sidebar.markdown("**Status:** <span class='status-processing'>🟑 Initializing</span>", unsafe_allow_html=True)
st.sidebar.markdown("**Loading:** Enhanced RAG System...")
# Model specifications in sidebar
st.sidebar.markdown("---")
st.sidebar.markdown("### πŸ€– Model Stack")
# Get dynamic backend information
backend_info = system_manager.get_llm_backend_info()
# Create dynamic model stack display
backend_icon = "πŸ€—" if backend_info['backend'] == "HuggingFace API" else "πŸ¦™"
backend_color = "#ff6b35" if backend_info['backend'] == "HuggingFace API" else "#4CAF50"
st.sidebar.markdown(f"""
<div style="font-size: 0.8rem;">
<div class="model-badge">Embedder: multi-qa-MiniLM-L6-cos-v1</div><br>
<div class="model-badge">Reranker: ms-marco-MiniLM-L6-v2</div><br>
<div class="model-badge" style="background-color: {backend_color}; color: white;">
{backend_icon} Generator: {backend_info['model']}
</div><br>
<div class="model-badge">Graph: NetworkX + spaCy</div>
</div>
""", unsafe_allow_html=True)
st.sidebar.markdown("---")
st.sidebar.markdown("### 🌐 Backend Status")
# Dynamic backend status display
if backend_info['backend'] == "HuggingFace API":
st.sidebar.markdown("πŸ€— **Active**: HuggingFace API")
st.sidebar.markdown(f"πŸ“‹ **Config**: {backend_info['config_file']}")
st.sidebar.markdown("🌐 **Cloud**: Ready for deployment")
else:
st.sidebar.markdown("πŸ¦™ **Active**: Local Ollama")
st.sidebar.markdown(f"πŸ“‹ **Config**: {backend_info['config_file']}")
st.sidebar.markdown("🏠 **Local**: Development mode")
st.sidebar.markdown("---")
st.sidebar.markdown("### πŸ”„ Switch Backend")
st.sidebar.markdown("**HF API**: Set HF_TOKEN environment variable")
st.sidebar.markdown("**Local**: Unset HF_TOKEN or use dummy token")
# Cache information
if system_manager.is_initialized:
cache_info = system_manager.get_cache_info()
st.sidebar.markdown("---")
st.sidebar.markdown("### πŸ’Ύ Knowledge Cache")
if cache_info["cache_valid"]:
st.sidebar.markdown("**Status:** βœ… Active")
st.sidebar.markdown(f"**Size:** {cache_info['cache_size_mb']:.1f}MB")
st.sidebar.markdown(f"**Chunks:** {cache_info['chunk_count']:,}")
if st.sidebar.button("πŸ—‘οΈ Clear Cache"):
system_manager.clear_cache()
st.sidebar.success("Cache cleared!")
else:
st.sidebar.markdown("**Status:** ❌ No Cache")
# Route to selected page
page_key = pages[selected_page]
if page_key == "system_overview":
show_system_overview()
elif page_key == "interactive_query":
show_interactive_query()
elif page_key == "results_analysis":
show_results_analysis()
elif page_key == "analytics_monitoring":
show_analytics_monitoring()
elif page_key == "technical_deepdive":
show_technical_deepdive()
# Footer
st.markdown("---")
st.markdown("""
<div class="footer">
<p><strong>Enhanced RISC-V RAG</strong> - Advanced RAG System with Neural Reranking</p>
<p>Built with 100% modular architecture β€’ HuggingFace API compatible β€’ Production ready</p>
<p>Β© 2025 Arthur Passuello - Portfolio Project for Swiss Tech Market</p>
</div>
""", unsafe_allow_html=True)
def show_system_overview():
"""Display Enhanced RAG system overview and capabilities"""
st.header("🏠 Enhanced RAG System Overview")
# System initialization button
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if not system_manager.is_initialized:
st.warning("⚠️ **System Needs Initialization** - Click below to process documents and enable querying!")
if st.button("πŸš€ Initialize Enhanced RAG System", type="primary", use_container_width=True):
initialize_epic2_system()
# Show demo mode info
st.info("πŸš€ **Demo Mode**: Using 10 RISC-V documents for faster initialization (~30 seconds)")
st.info("πŸ”§ **Architecture**: ModularUnifiedRetriever with neural reranking and graph enhancement")
st.info("πŸ’‘ **After initialization**: Use 'Interactive Query' to ask questions!")
else:
system_status = system_manager.get_system_status()
st.success(f"βœ… Enhanced RAG System Online - {system_status['documents']} Documents Ready")
# Show architecture info
architecture = system_status.get('architecture', 'unknown')
st.info(f"πŸ—οΈ **Architecture**: {architecture.title()} (100% compliant)")
# Show advanced features
epic2_features = system_status.get('epic2_features', [])
if epic2_features:
feature_count = len(epic2_features)
st.info(f"✨ **Advanced Features**: {feature_count} active features enabled")
# Backend Information Panel
st.subheader("πŸ€– LLM Backend Configuration")
backend_info = system_manager.get_llm_backend_info()
col1, col2, col3 = st.columns(3)
with col1:
if backend_info['backend'] == "HuggingFace API":
st.markdown("""
<div class="metric-card">
<h4>πŸ€— HuggingFace API</h4>
<p><strong>Status:</strong> Active</p>
<p><strong>Model:</strong> {}</p>
<p><strong>Deployment:</strong> Cloud Ready</p>
</div>
""".format(backend_info['model']), unsafe_allow_html=True)
else:
st.markdown("""
<div class="metric-card">
<h4>πŸ¦™ Local Ollama</h4>
<p><strong>Status:</strong> Active</p>
<p><strong>Model:</strong> {}</p>
<p><strong>Deployment:</strong> Development</p>
</div>
""".format(backend_info['model']), unsafe_allow_html=True)
with col2:
st.markdown("""
<div class="metric-card">
<h4>πŸ“‹ Configuration</h4>
<p><strong>Config File:</strong> {}</p>
<p><strong>Auto-Selected:</strong> βœ…</p>
<p><strong>Technical Features:</strong> Neural reranking, graph enhancement, hybrid search</p>
</div>
""".format(backend_info['config_file']), unsafe_allow_html=True)
with col3:
api_status = "βœ… Connected" if backend_info['api_available'] else "πŸ”„ Local Mode"
st.markdown("""
<div class="metric-card">
<h4>🌐 API Status</h4>
<p><strong>Connection:</strong> {}</p>
<p><strong>Switching:</strong> Automatic</p>
<p><strong>Fallback:</strong> Available</p>
</div>
""".format(api_status), unsafe_allow_html=True)
# Architecture overview
st.subheader("πŸ—οΈ Architecture Overview")
col1, col2 = st.columns(2)
with col1:
st.markdown("""
#### 🎯 Enhanced RAG Capabilities
**🧠 Neural Reranking**
- Cross-encoder model: `ms-marco-MiniLM-L6-v2`
- Real-time relevance scoring
- Sub-second inference times
**πŸ•ΈοΈ Graph Enhancement**
- Document relationship mapping
- Entity linking and analysis
- Knowledge graph traversal
**πŸ“Š Analytics Framework**
- Real-time performance monitoring
- Query analysis and categorization
- Component health tracking
**πŸ”„ Multi-Backend Architecture**
- FAISS vector search (primary)
- Hybrid dense + sparse retrieval
- Hot-swappable backend support
""")
with col2:
# Get dynamic backend info for architecture display
backend_info = system_manager.get_llm_backend_info()
backend_name = "HuggingFaceAdapter" if backend_info['backend'] == "HuggingFace API" else "OllamaAdapter"
st.markdown(f"""
#### πŸ”§ Component Architecture
**πŸ“„ Document Processor**
- Type: ModularDocumentProcessor
- Parser: PyMuPDFAdapter
- Chunker: SentenceBoundaryChunker
**πŸ”€ Embedder**
- Type: ModularEmbedder
- Model: SentenceTransformerModel
- Cache: MemoryCache with LRU
**πŸ” Retriever**
- Type: ModularUnifiedRetriever with neural reranking
- Index: FAISSIndex + BM25Retriever
- Fusion: GraphEnhancedRRFFusion
**🎯 Answer Generator**
- Type: AnswerGenerator
- LLM: {backend_name} ({backend_info['model']})
- Parser: MarkdownParser
""")
# Performance metrics
st.subheader("⚑ Performance Metrics")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown("""
<div class="metric-card">
<h4>πŸƒβ€β™‚οΈ Query Speed</h4>
<h2 style="color: #28a745;">< 500ms</h2>
<p>End-to-end processing</p>
</div>
""", unsafe_allow_html=True)
with col2:
st.markdown("""
<div class="metric-card">
<h4>🎯 Accuracy</h4>
<h2 style="color: #28a745;">95%+</h2>
<p>Answer relevance</p>
</div>
""", unsafe_allow_html=True)
with col3:
st.markdown("""
<div class="metric-card">
<h4>πŸ“š Documents</h4>
<h2 style="color: #2E86AB;">80+</h2>
<p>RISC-V corpus</p>
</div>
""", unsafe_allow_html=True)
with col4:
st.markdown("""
<div class="metric-card">
<h4>πŸ—οΈ Architecture</h4>
<h2 style="color: #2E86AB;">100%</h2>
<p>Modular compliance</p>
</div>
""", unsafe_allow_html=True)
# Feature showcase
st.subheader("✨ Advanced Feature Showcase")
feature_tabs = st.tabs(["🧠 Neural Reranking", "πŸ•ΈοΈ Graph Enhancement", "πŸ“Š Analytics", "🌐 API Compatibility"])
with feature_tabs[0]:
st.markdown("""
#### Neural Reranking Pipeline
**Model:** `cross-encoder/ms-marco-MiniLM-L6-v2`
- **Input:** Query + candidate documents
- **Output:** Relevance scores (0.0 - 1.0)
- **Performance:** ~314ms for 50 candidates
- **Improvement:** Up to 40% relevance boost
**HuggingFace Integration:**
```python
# API-compatible implementation
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(
"cross-encoder/ms-marco-MiniLM-L6-v2"
)
```
""")
with feature_tabs[1]:
st.markdown("""
#### Graph Enhancement System
**Technology Stack:**
- **Graph Engine:** NetworkX
- **Entity Extraction:** spaCy (en_core_web_sm)
- **Relationship Mapping:** Custom algorithms
- **Performance:** <50ms graph traversal
**Capabilities:**
- Document relationship discovery
- Entity linking across documents
- Semantic similarity clustering
- Knowledge graph visualization
""")
with feature_tabs[2]:
st.markdown("""
#### Real-time Analytics
**Monitoring Capabilities:**
- Query performance tracking
- Component health status
- Model inference times
- Cache hit rates
**Dashboard Features:**
- Live performance charts
- Query analysis trends
- System resource utilization
- Error rate monitoring
""")
with feature_tabs[3]:
st.markdown("""
#### API Compatibility Matrix
| Component | Local Model | HuggingFace API | Status |
|-----------|-------------|-----------------|--------|
| **Embedder** | βœ… sentence-transformers | βœ… Inference API | Ready |
| **Reranker** | βœ… transformers | βœ… Inference API | Ready |
| **Generator** | βœ… Ollama | βœ… Inference API | Ready |
| **Graph** | βœ… NetworkX+spaCy | βœ… Custom API | Ready |
**Deployment Options:**
- πŸ–₯️ **Local:** Full advanced capabilities with neural reranking
- ☁️ **Cloud:** HuggingFace Spaces compatible
- πŸ”„ **Hybrid:** Local + API fallback
""")
def show_interactive_query():
"""Interactive query interface with real-time processing"""
st.header("πŸ’¬ Interactive Query Interface")
if not system_manager.is_initialized:
st.error("🚫 **System Not Initialized** - No documents have been indexed yet!")
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.markdown("""
### πŸš€ Quick Start Guide
1. **Go to System Overview** (first page)
2. **Click "Initialize Enhanced RAG System"**
3. **Wait for document processing** (~30 seconds)
4. **Return here to start querying!**
""")
if st.button("🏠 Go to System Overview", type="primary", use_container_width=True):
# This will trigger a rerun and change the page selection
st.session_state.force_overview = True
st.rerun()
st.info("πŸ’‘ **Why initialize?** The system needs to process and index documents before it can answer questions. This demo uses 10 RISC-V technical documents for faster setup.")
return
# Query input section
st.subheader("πŸ” Query Input")
# Sample queries
sample_queries = [
"How does RISC-V handle atomic operations?",
"What are the main differences between RV32 and RV64?",
"Explain RISC-V vector extension capabilities",
"How does RISC-V memory model work?",
"What is the RISC-V privileged architecture?"
]
col1, col2 = st.columns([3, 1])
with col1:
query = st.text_input(
"Enter your RISC-V question:",
placeholder="Ask anything about RISC-V architecture, specifications, or implementations...",
key="query_input"
)
with col2:
st.selectbox(
"Sample Queries:",
[""] + sample_queries,
key="sample_query",
on_change=lambda: st.session_state.update({"query_input": st.session_state.sample_query}) if st.session_state.sample_query else None
)
# Process query button
if st.button("πŸš€ Process Query", type="primary", disabled=not query):
if query:
process_query_with_visualization(query)
def show_results_analysis():
"""Results analysis with advanced RAG enhancements"""
st.header("πŸ“Š Results Analysis Dashboard")
if not system_manager.is_initialized:
st.error("🚫 **System Not Initialized** - No documents have been indexed yet!")
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.markdown("""
### πŸš€ Quick Start Guide
1. **Go to System Overview** (first page)
2. **Click "Initialize Enhanced RAG System"**
3. **Wait for document processing** (~30 seconds)
4. **Run some queries** in Interactive Query
5. **Return here to analyze results!**
""")
if st.button("🏠 Go to System Overview", type="primary", use_container_width=True, key="results_to_overview"):
st.session_state.force_overview = True
st.rerun()
st.info("πŸ’‘ **What you'll see here:** Query performance metrics, retrieval analysis, neural reranking effectiveness, and system diagnostics.")
return
# Check if we have query results to analyze
if 'last_query_results' in st.session_state and st.session_state.last_query_results:
results = st.session_state.last_query_results
st.subheader("πŸ” Latest Query Analysis")
st.markdown(f"**Query:** {results['query']}")
# Display generated answer if available
if 'answer' in results and results['answer']:
st.subheader("πŸ€– Generated Answer")
st.markdown(f"""
<div style="background: #f8f9fa; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2E86AB; margin-bottom: 1.5rem;">
{results['answer']}
</div>
""", unsafe_allow_html=True)
# Performance breakdown
st.subheader("⚑ Performance Breakdown")
performance = results['performance']
col1, col2, col3, col4 = st.columns(4)
stages = [
("Dense Retrieval", "dense_retrieval", "πŸ”"),
("Sparse Retrieval", "sparse_retrieval", "πŸ“"),
("Graph Enhancement", "graph_enhancement", "πŸ•ΈοΈ"),
("Neural Reranking", "neural_reranking", "🧠")
]
for i, (name, key, icon) in enumerate(stages):
col = [col1, col2, col3, col4][i]
stage_data = performance['stages'][key]
with col:
st.metric(
f"{icon} {name}",
f"{stage_data['time_ms']:.0f}ms",
f"{stage_data['results']} results"
)
# Advanced feature analysis
st.subheader("πŸš€ Advanced Enhancements")
col1, col2 = st.columns(2)
with col1:
st.markdown("#### 🧠 Neural Reranking Impact")
for i, result in enumerate(results['results'][:3]):
if 'neural_boost' in result:
st.markdown(f"**Result #{i+1}:** +{result['neural_boost']:.2f} confidence boost")
with col2:
st.markdown("#### πŸ•ΈοΈ Graph Enhancement")
for i, result in enumerate(results['results'][:3]):
if 'graph_connections' in result:
st.markdown(f"**Result #{i+1}:** {result['graph_connections']} related documents")
else:
st.info("πŸ” Process a query in the Interactive Query page to see results analysis here.")
def show_analytics_monitoring():
"""Interactive analytics and monitoring dashboard with real-time charts"""
if not system_manager.is_initialized:
st.warning("⚠️ Please initialize the Enhanced RAG system from the System Overview page first.")
return
# Render the interactive analytics dashboard
analytics_dashboard.render_dashboard()
# Add system health section
st.markdown("---")
st.subheader("πŸ”„ System Health Overview")
system_status = system_manager.get_system_status()
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("System Status", system_status["status"], "Online")
with col2:
st.metric("Documents Loaded", system_status["documents"], "Ready")
with col3:
st.metric("Architecture", system_status["architecture"].title(), "100% Modular")
with col4:
if "performance" in system_status and system_status["performance"]:
perf = system_status["performance"]
st.metric("Queries Processed", perf.get("total_queries", 0))
else:
st.metric("Queries Processed", 0)
# Advanced features status
st.subheader("✨ Advanced Features Status")
features = system_status.get("epic2_features", [])
if features:
col1, col2, col3 = st.columns(3)
feature_status = {
"neural_reranking": "🧠 Neural Reranking",
"graph_retrieval": "πŸ•ΈοΈ Graph Enhancement",
"analytics_dashboard": "πŸ“Š Analytics Framework"
}
for i, feature in enumerate(features[:3]):
col = [col1, col2, col3][i]
with col:
feature_name = feature_status.get(feature, feature)
st.markdown(f"βœ… **{feature_name}**")
st.markdown("Status: Active")
# Model specifications
st.subheader("πŸ€– Model Performance")
model_specs = system_manager.get_model_specifications()
for model_name, specs in model_specs.items():
with st.expander(f"πŸ“‹ {model_name.title()}", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**Model:** {specs['model_name']}")
st.markdown(f"**Type:** {specs['model_type']}")
with col2:
st.markdown(f"**Performance:** {specs['performance']}")
st.markdown(f"**API Compatible:** {specs['api_compatible']}")
if "performance" in system_status and system_status["performance"]:
st.subheader("πŸ“Š Performance Metrics")
perf = system_status["performance"]
col1, col2 = st.columns(2)
with col1:
st.metric("Average Response Time", f"{perf.get('average_response_time', 0):.0f}ms")
with col2:
st.metric("Last Query Time", f"{perf.get('last_query_time', 0):.0f}ms")
def show_technical_deepdive():
"""Technical deep-dive into Enhanced RAG implementation"""
st.header("πŸ”§ Technical Deep-dive")
# System status check
if system_manager.is_initialized:
system_status = system_manager.get_system_status()
st.success(f"βœ… Enhanced RAG System Online - {system_status['retriever_type']} Active")
else:
st.warning("⚠️ System not initialized. Visit System Overview to initialize.")
return
# Model specifications section
st.subheader("πŸ€– Model Specifications & API Compatibility")
model_specs = system_manager.get_model_specifications()
for model_name, specs in model_specs.items():
with st.expander(f"πŸ“‹ {model_name.replace('_', ' ').title()}", expanded=True):
col1, col2 = st.columns(2)
with col1:
st.markdown("#### Model Details")
st.markdown(f"**Model Name:** `{specs['model_name']}`")
st.markdown(f"**Model Type:** {specs['model_type']}")
st.markdown(f"**Performance:** {specs['performance']}")
with col2:
st.markdown("#### API Compatibility")
st.markdown(f"**HuggingFace API:** {specs['api_compatible']}")
st.markdown(f"**Local Support:** {specs['local_support']}")
# API integration example
if "HuggingFace" in specs['api_compatible']:
st.markdown("**API Integration Example:**")
if model_name == "embedder":
st.code("""
# HuggingFace API Integration
from transformers import AutoModel, AutoTokenizer
model = AutoModel.from_pretrained(
"sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
)
tokenizer = AutoTokenizer.from_pretrained(
"sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
)
""", language="python")
elif model_name == "neural_reranker":
st.code("""
# Cross-encoder API Integration
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(
"cross-encoder/ms-marco-MiniLM-L6-v2"
)
""", language="python")
elif model_name == "answer_generator":
st.code("""
# LLM API Integration (switchable)
# Local: Ollama
# Cloud: HuggingFace Inference API
import requests
response = requests.post(
"https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf",
headers={"Authorization": f"Bearer {api_token}"},
json={"inputs": prompt}
)
""", language="python")
st.subheader("πŸ—οΈ System Architecture")
# Get actual system status
system_status = system_manager.get_system_status()
# Component details with real status
# Get dynamic backend info
backend_info = system_manager.get_llm_backend_info()
backend_adapter = "HuggingFaceAdapter" if backend_info['backend'] == "HuggingFace API" else "OllamaAdapter"
backend_description = f"Generates contextual answers using {backend_info['backend']} with confidence scoring"
components = {
"Platform Orchestrator": {
"status": "βœ… Operational",
"type": "Core System",
"implementation": "Direct wiring pattern",
"config": "advanced_test.yaml",
"description": "Orchestrates all components and manages system lifecycle"
},
"Document Processor": {
"status": "βœ… Operational",
"type": "ModularDocumentProcessor",
"implementation": "Hybrid adapter pattern",
"sub_components": ["PyMuPDFAdapter", "SentenceBoundaryChunker", "TechnicalContentCleaner"],
"description": "Processes RISC-V PDFs with technical content optimization"
},
"Embedder": {
"status": "βœ… Operational",
"type": "ModularEmbedder",
"implementation": "Direct implementation",
"sub_components": ["SentenceTransformerModel", "DynamicBatchProcessor", "MemoryCache"],
"description": "Converts text to vector embeddings with batch optimization"
},
"Retriever": {
"status": "βœ… Operational",
"type": f"{system_status.get('retriever_type', 'ModularUnifiedRetriever')} (Enhanced)",
"implementation": "Modular unified with neural reranking and graph enhancement",
"sub_components": ["FAISSIndex", "BM25Retriever", "GraphEnhancedRRFFusion", "NeuralReranker"],
"description": "Advanced retrieval with neural reranking and graph enhancement"
},
"Answer Generator": {
"status": "βœ… Operational",
"type": "AnswerGenerator",
"implementation": "Modular with adapters",
"sub_components": ["SimplePromptBuilder", backend_adapter, "MarkdownParser", "SemanticScorer"],
"description": backend_description
},
"Query Processor": {
"status": "βœ… Operational",
"type": "ModularQueryProcessor",
"implementation": "5-phase workflow",
"sub_components": ["NLPAnalyzer", "MMRSelector", "RichAssembler"],
"description": "Processes and optimizes queries through analytical pipeline"
}
}
for name, details in components.items():
with st.expander(f"{details['status']} {name}", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**Type:** {details['type']}")
st.markdown(f"**Implementation:** {details['implementation']}")
if 'config' in details:
st.markdown(f"**Config:** {details['config']}")
st.markdown(f"**Description:** {details['description']}")
with col2:
if 'sub_components' in details:
st.markdown("**Sub-components:**")
for sub in details['sub_components']:
st.markdown(f"- {sub}")
# Advanced specific features
st.subheader("πŸš€ Advanced RAG Features")
epic2_tabs = st.tabs(["🧠 Neural Reranking", "πŸ•ΈοΈ Graph Enhancement", "πŸ“Š Analytics", "πŸ”„ Multi-Backend"])
with epic2_tabs[0]:
st.markdown("""
#### Neural Reranking Architecture
**Cross-Encoder Model:** `cross-encoder/ms-marco-MiniLM-L6-v2`
- **Purpose:** Re-rank candidate documents based on query-document semantic similarity
- **Input:** Query + candidate document pairs
- **Output:** Relevance scores (0.0 - 1.0)
- **Performance:** ~314ms for 50 candidates on CPU
**Implementation:**
""")
st.code("""
class NeuralReranker:
def __init__(self, model_name="cross-encoder/ms-marco-MiniLM-L6-v2"):
self.model = CrossEncoder(model_name)
def rerank(self, query: str, documents: List[str]) -> List[float]:
pairs = [[query, doc] for doc in documents]
scores = self.model.predict(pairs)
return scores.tolist()
""", language="python")
with epic2_tabs[1]:
st.markdown("""
#### Graph Enhancement System
**Graph Engine:** NetworkX with spaCy NLP
- **Entity Extraction:** `en_core_web_sm` model
- **Relationship Mapping:** Custom algorithms for technical documents
- **Graph Traversal:** PageRank and community detection
- **Performance:** <50ms for graph-based retrieval
**Features:**
- Document relationship discovery
- Technical term entity linking
- Cross-reference resolution
- Semantic clustering
""")
with epic2_tabs[2]:
st.markdown("""
#### Analytics Framework
**Real-time Monitoring:**
- Query performance tracking
- Component health monitoring
- Model inference timing
- Cache hit rate analysis
**Dashboard Integration:**
- Plotly-based visualizations
- Live performance charts
- Query analysis trends
- System resource monitoring
""")
with epic2_tabs[3]:
st.markdown("""
#### Multi-Backend Architecture
**Current Configuration:**
- **Primary Backend:** FAISS (IndexFlatIP)
- **Fallback:** Same FAISS instance
- **Hot-swapping:** Disabled for demo stability
- **Health Monitoring:** 30-second intervals
**Supported Backends:**
- FAISS (local, high performance)
- Weaviate (cloud-ready, graph capabilities)
- Custom implementations via adapter pattern
""")
# Deployment information
st.subheader("🌐 Deployment & API Compatibility")
deployment_info = {
"Local Development": {
"description": "Full advanced capabilities with neural reranking and graph enhancement",
"models": "All models downloaded and cached locally",
"performance": "Optimal performance with MPS/CUDA acceleration",
"requirements": "8GB RAM, 4GB model storage"
},
"HuggingFace Spaces": {
"description": "Cloud deployment with HuggingFace Inference API",
"models": "API-based inference for all models",
"performance": "Network-dependent, ~500ms additional latency",
"requirements": "HuggingFace API token, optimized model selection"
},
"Hybrid Deployment": {
"description": "Local processing with cloud fallback",
"models": "Local primary, API fallback for failures",
"performance": "Best of both worlds with resilience",
"requirements": "Local setup + API credentials"
}
}
for deployment, info in deployment_info.items():
with st.expander(f"πŸš€ {deployment}", expanded=False):
st.markdown(f"**Description:** {info['description']}")
st.markdown(f"**Models:** {info['models']}")
st.markdown(f"**Performance:** {info['performance']}")
st.markdown(f"**Requirements:** {info['requirements']}")
def initialize_epic2_system():
"""Initialize the Enhanced RAG system and process documents"""
progress_bar = st.progress(0)
status_text = st.empty()
# Show initial Enhanced RAG info
st.info("πŸš€ **Initializing Enhanced RISC-V RAG System**")
st.info("πŸ”§ **Features**: Neural Reranking + Graph Enhancement + Multi-Backend")
def update_progress(value):
progress_bar.progress(value)
def update_status(text):
status_text.text(text)
try:
# Use the real system manager
success = system_manager.initialize_system(
progress_callback=update_progress,
status_callback=update_status
)
if success:
# Update session state
st.session_state.system_initialized = True
progress_bar.empty()
status_text.empty()
st.success("πŸš€ Enhanced RAG system initialized successfully!")
# Show system status with advanced features
system_status = system_manager.get_system_status()
st.info(f"βœ… System online with {system_status['documents']} documents processed")
# Show advanced features status
epic2_features = system_status.get('epic2_features', [])
if epic2_features:
feature_names = {
'neural_reranking': '🧠 Neural Reranking',
'graph_retrieval': 'πŸ•ΈοΈ Graph Enhancement',
'multi_backend': 'πŸ”„ Multi-Backend',
'analytics_dashboard': 'πŸ“Š Analytics'
}
active_features = [feature_names.get(f, f) for f in epic2_features]
st.success(f"✨ **Advanced Features Active**: {', '.join(active_features)}")
st.rerun()
else:
progress_bar.empty()
status_text.empty()
st.error("❌ Failed to initialize Enhanced RAG system. Check logs for details.")
except Exception as e:
progress_bar.empty()
status_text.empty()
st.error(f"❌ Initialization failed: {str(e)}")
# Dynamic tip based on backend
backend_info = system_manager.get_llm_backend_info()
if backend_info['backend'] == "HuggingFace API":
st.info("πŸ’‘ **Tip**: Ensure HF_TOKEN environment variable is set with valid HuggingFace API token")
else:
st.info(f"πŸ’‘ **Tip**: Ensure Ollama is running with {backend_info['model']} model")
logger.error(f"System initialization error: {e}")
def process_query_with_visualization(query: str):
"""Process query with real-time stage visualization using actual Enhanced RAG system"""
st.subheader("πŸ”„ Processing Pipeline")
# Create containers for stage visualization and results
stage_container = st.container()
results_container = st.container()
try:
with stage_container:
# Initialize stage display
col1, col2, col3, col4 = st.columns(4)
stage_placeholders = []
for col in [col1, col2, col3, col4]:
stage_placeholders.append(col.empty())
stages = [
{"name": "Dense Retrieval", "icon": "πŸ”"},
{"name": "Sparse Retrieval", "icon": "πŸ“"},
{"name": "Graph Enhancement", "icon": "πŸ•ΈοΈ"},
{"name": "Neural Reranking", "icon": "🧠"}
]
# Show initial pending state
for i, stage in enumerate(stages):
stage_placeholders[i].markdown(f"""
<div class="stage-indicator stage-pending">
{stage["icon"]} {stage["name"]}<br>
<small>⏳ Pending...</small>
</div>
""", unsafe_allow_html=True)
# Process query through system manager
start_time = time.time()
# Update stages as processing (simulate real-time updates)
for i, stage in enumerate(stages):
stage_placeholders[i].markdown(f"""
<div class="stage-indicator stage-processing">
{stage["icon"]} {stage["name"]}<br>
<small>⏳ Processing...</small>
</div>
""", unsafe_allow_html=True)
time.sleep(0.1) # Brief pause for visual effect
# Get actual results from system
query_results = system_manager.process_query(query)
# Add query data to analytics dashboard
analytics_dashboard.add_query_data(query, query_results["performance"])
# Update stages with actual performance data
performance = query_results["performance"]
for i, stage in enumerate(stages):
stage_key = ["dense_retrieval", "sparse_retrieval", "graph_enhancement", "neural_reranking"][i]
stage_data = performance["stages"][stage_key]
stage_placeholders[i].markdown(f"""
<div class="stage-indicator stage-completed">
{stage["icon"]} {stage["name"]}<br>
<small>βœ… {stage_data['time_ms']:.0f}ms β€’ {stage_data['results']} results</small>
</div>
""", unsafe_allow_html=True)
# Display results
with results_container:
st.subheader("πŸ“‹ Query Results")
# Show query metadata
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Time", f"{performance['total_time_ms']:.0f}ms")
with col2:
st.metric("Results Found", len(query_results["results"]))
with col3:
st.metric("Advanced Features", "βœ… Active")
st.markdown("---")
# Display generated answer first
if 'answer' in query_results and query_results['answer']:
st.subheader("πŸ€– Generated Answer")
st.markdown(f"""
<div style="background: #f8f9fa; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2E86AB; margin-bottom: 1.5rem;">
{query_results['answer']}
</div>
""", unsafe_allow_html=True)
# Display source documents
st.subheader("πŸ“„ Source Documents")
results = query_results["results"]
for i, result in enumerate(results, 1):
with st.expander(f"#{i} [{result['confidence']:.2f}] {result['title']}", expanded=i==1):
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(f"**Snippet:** {result['snippet']}")
st.markdown(f"**Source:** `{result['source']}`")
if 'page' in result:
st.markdown(f"**Page:** {result['page']}")
with col2:
st.markdown(f"**Confidence:** {result['confidence']:.2f}")
if 'neural_boost' in result:
st.markdown(f"**Neural Boost:** <span style='color: #28a745; font-weight: bold;'>+{result['neural_boost']:.2f}</span>", unsafe_allow_html=True)
if 'graph_connections' in result:
st.markdown(f"**Graph Links:** {result['graph_connections']} related docs")
# Store results in session for analysis page
st.session_state.last_query_results = query_results
except Exception as e:
st.error(f"❌ Query processing failed: {str(e)}")
logger.error(f"Query processing error: {e}")
if __name__ == "__main__":
main()