Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
HuggingFace Spaces deployment wrapper for the Technical Documentation RAG Assistant. | |
This file serves as the main entry point for HuggingFace Spaces deployment, | |
with optimizations for cloud hosting and resource constraints. | |
Features: | |
- Automatic environment detection (HF Spaces vs local) | |
- Graceful fallbacks for missing dependencies | |
- Memory-optimized configuration | |
- Neural reranking and graph enhancement capabilities | |
""" | |
import os | |
import sys | |
from pathlib import Path | |
import streamlit as st | |
import subprocess | |
import time | |
import logging | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Configure for HuggingFace Spaces deployment | |
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true" | |
os.environ["STREAMLIT_SERVER_ENABLE_CORS"] = "false" | |
os.environ["STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION"] = "false" | |
# Setup cache directories for HuggingFace Spaces | |
cache_base = "/tmp/.cache" | |
os.environ.setdefault("HF_HOME", f"{cache_base}/huggingface") | |
os.environ.setdefault("TRANSFORMERS_CACHE", f"{cache_base}/huggingface/transformers") | |
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", f"{cache_base}/sentence-transformers") | |
# Create cache directories | |
for cache_dir in [ | |
os.environ["HF_HOME"], | |
os.environ["TRANSFORMERS_CACHE"], | |
os.environ["SENTENCE_TRANSFORMERS_HOME"] | |
]: | |
try: | |
os.makedirs(cache_dir, exist_ok=True) | |
except Exception as e: | |
logger.warning(f"Could not create cache directory {cache_dir}: {e}") | |
# Environment constants | |
IS_HF_SPACES = True # This app is designed for HuggingFace Spaces deployment | |
IS_LOCAL_DEV = False | |
# Add project root to path | |
project_root = Path(__file__).parent | |
sys.path.insert(0, str(project_root)) | |
def check_environment_capabilities(): | |
"""Check environment capabilities and suggest appropriate configuration.""" | |
capabilities = { | |
"has_ollama": False, | |
"has_hf_token": False, | |
"memory_optimized": IS_HF_SPACES, | |
"recommended_config": "default" | |
} | |
# Check Ollama availability | |
try: | |
result = subprocess.run(['which', 'ollama'], capture_output=True, text=True, timeout=5) | |
if result.returncode == 0: | |
# Check if service is running and model available | |
result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=10) | |
if result.returncode == 0 and 'llama3.2:3b' in result.stdout: | |
capabilities["has_ollama"] = True | |
logger.info("Ollama with llama3.2:3b detected") | |
except (subprocess.TimeoutExpired, Exception) as e: | |
logger.info(f"Ollama check failed or timed out: {e}") | |
# Check HuggingFace token availability | |
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") | |
if hf_token: | |
capabilities["has_hf_token"] = True | |
logger.info("HuggingFace token detected") | |
# Recommend configuration based on capabilities | |
if capabilities["has_hf_token"]: | |
capabilities["recommended_config"] = "epic2_hf_api" | |
elif capabilities["has_ollama"]: | |
capabilities["recommended_config"] = "epic2_graph_calibrated" | |
else: | |
capabilities["recommended_config"] = "default" | |
return capabilities | |
def setup_environment_display(capabilities): | |
"""Display environment status and configuration recommendations.""" | |
st.sidebar.markdown("### π§ Environment Status") | |
# Environment detection | |
if IS_HF_SPACES: | |
st.sidebar.success("π Running on HuggingFace Spaces") | |
else: | |
st.sidebar.info("π» Running locally") | |
# Capability status | |
if capabilities["has_ollama"]: | |
st.sidebar.success("β Ollama + Llama 3.2 available") | |
else: | |
st.sidebar.warning("β οΈ Ollama not available") | |
if capabilities["has_hf_token"]: | |
st.sidebar.success("β HuggingFace API available") | |
else: | |
st.sidebar.info("π‘ Add HF_TOKEN for API access") | |
# Configuration recommendation | |
config = capabilities["recommended_config"] | |
st.sidebar.markdown(f"**Recommended Config**: `{config}`") | |
# Setup instructions if needed | |
if not capabilities["has_ollama"] and not capabilities["has_hf_token"]: | |
st.sidebar.markdown(""" | |
**Setup Options:** | |
1. **API Mode**: Set HF_TOKEN environment variable | |
2. **Local Mode**: Install Ollama + `ollama pull llama3.2:3b` | |
3. **Demo Mode**: Use mock configuration | |
""") | |
return capabilities | |
def setup_models_if_needed(): | |
"""Setup models if needed for cloud deployment.""" | |
try: | |
# Quick validation of critical dependencies | |
import rank_bm25 | |
import pdfplumber | |
logger.info("β Critical dependencies available") | |
# Check if we need to setup spaCy model | |
try: | |
import spacy | |
spacy.load("en_core_web_sm") | |
logger.info("β spaCy model available") | |
except OSError: | |
logger.info("π₯ Setting up spaCy model...") | |
try: | |
import subprocess | |
result = subprocess.run([ | |
sys.executable, "-m", "spacy", "download", "en_core_web_sm" | |
], capture_output=True, text=True, timeout=300) | |
if result.returncode == 0: | |
logger.info("β spaCy model setup complete") | |
else: | |
logger.warning("β οΈ spaCy model setup failed - entity extraction may be limited") | |
except Exception as e: | |
logger.warning(f"β οΈ spaCy model auto-setup failed: {e}") | |
except ImportError: | |
logger.warning("β οΈ spaCy not available") | |
except ImportError as e: | |
logger.error(f"β Critical dependency missing: {e}") | |
st.error(f"Critical dependency missing: {e}") | |
st.info("Please install missing packages with: pip install -r requirements.txt") | |
st.stop() | |
def main(): | |
"""Main application entry point with Enhanced RAG capabilities.""" | |
# Page configuration | |
st.set_page_config( | |
page_title="Enhanced RISC-V RAG Demo", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Setup models if needed | |
setup_models_if_needed() | |
# Check environment capabilities | |
capabilities = check_environment_capabilities() | |
setup_environment_display(capabilities) | |
# Main application header | |
st.title("π Enhanced RISC-V RAG") | |
st.markdown(""" | |
**Advanced RAG System for Technical Documentation** | |
This system demonstrates advanced RAG capabilities with: | |
- π§ **Neural reranking** with cross-encoder models | |
- π **Graph enhancement** for document relationships | |
- π **Hybrid search** combining semantic and keyword matching | |
- π οΈ **Modular architecture** with 6 specialized components | |
""") | |
# Import and run the appropriate app based on capabilities | |
try: | |
if capabilities["has_hf_token"] or capabilities["has_ollama"]: | |
# Use Enhanced RAG demo with full capabilities | |
logger.info(f"Loading Enhanced RAG demo with config: {capabilities['recommended_config']}") | |
# Set configuration environment variable | |
os.environ["RAG_CONFIG"] = f"config/{capabilities['recommended_config']}.yaml" | |
# Import and run Enhanced RAG demo | |
import streamlit_epic2_demo | |
streamlit_epic2_demo.main() # Actually call the main function | |
else: | |
# Fallback to basic demo with mock capabilities | |
st.info(""" | |
**Demo Mode Active** - Limited functionality without Ollama or HF API access. | |
**System Capabilities** (when properly configured): | |
- Multi-document PDF processing with advanced parsing | |
- Hybrid semantic + keyword search with BM25 + vector similarity | |
- Neural reranking with cross-encoder models | |
- Graph-enhanced document relationships | |
- Real-time performance metrics and source attribution | |
""") | |
# Show system architecture | |
st.markdown("### ποΈ System Architecture") | |
st.markdown(""" | |
**6-Component Modular Architecture:** | |
1. **Platform Orchestrator** - System lifecycle management | |
2. **Document Processor** - PDF parsing and chunking | |
3. **Embedder** - Text vectorization with MPS acceleration | |
4. **Retriever** - Hybrid search with graph enhancement | |
5. **Answer Generator** - LLM-based response synthesis | |
6. **Query Processor** - Workflow orchestration | |
""") | |
# Show system features | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Neural Reranking", "β ", delta="Cross-encoder") | |
with col2: | |
st.metric("Graph Enhancement", "β ", delta="Entity linking") | |
with col3: | |
st.metric("Architecture", "Modular", delta="6 components") | |
except ImportError as e: | |
st.error(f"Failed to import application modules: {e}") | |
st.info("Please ensure all dependencies are installed correctly.") | |
# Show installation guide | |
st.markdown("### π¦ Installation Guide") | |
st.code(""" | |
# Install dependencies | |
pip install -r requirements.txt | |
# For local LLM (recommended) | |
ollama pull llama3.2:3b | |
# For API access (alternative) | |
export HF_TOKEN=your_token_here | |
""") | |
except Exception as e: | |
logger.error(f"Application error: {e}") | |
st.error(f"Application error: {e}") | |
st.info("Please check the logs for detailed error information.") | |
if __name__ == "__main__": | |
main() |