Spaces:
Running
Running
File size: 10,005 Bytes
5e1a30c 1cdeab3 5e1a30c 0eb61bf 0242f02 0eb61bf 5e1a30c 0242f02 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 0242f02 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 212cdc0 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c 1cdeab3 5e1a30c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
#!/usr/bin/env python3
"""
HuggingFace Spaces deployment wrapper for the Technical Documentation RAG Assistant.
This file serves as the main entry point for HuggingFace Spaces deployment,
with optimizations for cloud hosting and resource constraints.
Features:
- Automatic environment detection (HF Spaces vs local)
- Graceful fallbacks for missing dependencies
- Memory-optimized configuration
- Neural reranking and graph enhancement capabilities
"""
import os
import sys
from pathlib import Path
import streamlit as st
import subprocess
import time
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Configure for HuggingFace Spaces deployment
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
os.environ["STREAMLIT_SERVER_ENABLE_CORS"] = "false"
os.environ["STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION"] = "false"
# Setup cache directories for HuggingFace Spaces
cache_base = "/tmp/.cache"
os.environ.setdefault("HF_HOME", f"{cache_base}/huggingface")
os.environ.setdefault("TRANSFORMERS_CACHE", f"{cache_base}/huggingface/transformers")
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", f"{cache_base}/sentence-transformers")
# Create cache directories
for cache_dir in [
os.environ["HF_HOME"],
os.environ["TRANSFORMERS_CACHE"],
os.environ["SENTENCE_TRANSFORMERS_HOME"]
]:
try:
os.makedirs(cache_dir, exist_ok=True)
except Exception as e:
logger.warning(f"Could not create cache directory {cache_dir}: {e}")
# Environment constants
IS_HF_SPACES = True # This app is designed for HuggingFace Spaces deployment
IS_LOCAL_DEV = False
# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
def check_environment_capabilities():
"""Check environment capabilities and suggest appropriate configuration."""
capabilities = {
"has_ollama": False,
"has_hf_token": False,
"memory_optimized": IS_HF_SPACES,
"recommended_config": "default"
}
# Check Ollama availability
try:
result = subprocess.run(['which', 'ollama'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
# Check if service is running and model available
result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=10)
if result.returncode == 0 and 'llama3.2:3b' in result.stdout:
capabilities["has_ollama"] = True
logger.info("Ollama with llama3.2:3b detected")
except (subprocess.TimeoutExpired, Exception) as e:
logger.info(f"Ollama check failed or timed out: {e}")
# Check HuggingFace token availability
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
if hf_token:
capabilities["has_hf_token"] = True
logger.info("HuggingFace token detected")
# Recommend configuration based on capabilities
if capabilities["has_hf_token"]:
capabilities["recommended_config"] = "epic2_hf_api"
elif capabilities["has_ollama"]:
capabilities["recommended_config"] = "epic2_graph_calibrated"
else:
capabilities["recommended_config"] = "default"
return capabilities
def setup_environment_display(capabilities):
"""Display environment status and configuration recommendations."""
st.sidebar.markdown("### π§ Environment Status")
# Environment detection
if IS_HF_SPACES:
st.sidebar.success("π Running on HuggingFace Spaces")
else:
st.sidebar.info("π» Running locally")
# Capability status
if capabilities["has_ollama"]:
st.sidebar.success("β
Ollama + Llama 3.2 available")
else:
st.sidebar.warning("β οΈ Ollama not available")
if capabilities["has_hf_token"]:
st.sidebar.success("β
HuggingFace API available")
else:
st.sidebar.info("π‘ Add HF_TOKEN for API access")
# Configuration recommendation
config = capabilities["recommended_config"]
st.sidebar.markdown(f"**Recommended Config**: `{config}`")
# Setup instructions if needed
if not capabilities["has_ollama"] and not capabilities["has_hf_token"]:
st.sidebar.markdown("""
**Setup Options:**
1. **API Mode**: Set HF_TOKEN environment variable
2. **Local Mode**: Install Ollama + `ollama pull llama3.2:3b`
3. **Demo Mode**: Use mock configuration
""")
return capabilities
def setup_models_if_needed():
"""Setup models if needed for cloud deployment."""
try:
# Quick validation of critical dependencies
import rank_bm25
import pdfplumber
logger.info("β
Critical dependencies available")
# Check if we need to setup spaCy model
try:
import spacy
spacy.load("en_core_web_sm")
logger.info("β
spaCy model available")
except OSError:
logger.info("π₯ Setting up spaCy model...")
try:
import subprocess
result = subprocess.run([
sys.executable, "-m", "spacy", "download", "en_core_web_sm"
], capture_output=True, text=True, timeout=300)
if result.returncode == 0:
logger.info("β
spaCy model setup complete")
else:
logger.warning("β οΈ spaCy model setup failed - entity extraction may be limited")
except Exception as e:
logger.warning(f"β οΈ spaCy model auto-setup failed: {e}")
except ImportError:
logger.warning("β οΈ spaCy not available")
except ImportError as e:
logger.error(f"β Critical dependency missing: {e}")
st.error(f"Critical dependency missing: {e}")
st.info("Please install missing packages with: pip install -r requirements.txt")
st.stop()
def main():
"""Main application entry point with Enhanced RAG capabilities."""
# Page configuration
st.set_page_config(
page_title="Enhanced RISC-V RAG Demo",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded"
)
# Setup models if needed
setup_models_if_needed()
# Check environment capabilities
capabilities = check_environment_capabilities()
setup_environment_display(capabilities)
# Main application header
st.title("π Enhanced RISC-V RAG")
st.markdown("""
**Advanced RAG System for Technical Documentation**
This system demonstrates advanced RAG capabilities with:
- π§ **Neural reranking** with cross-encoder models
- π **Graph enhancement** for document relationships
- π **Hybrid search** combining semantic and keyword matching
- π οΈ **Modular architecture** with 6 specialized components
""")
# Import and run the appropriate app based on capabilities
try:
if capabilities["has_hf_token"] or capabilities["has_ollama"]:
# Use Enhanced RAG demo with full capabilities
logger.info(f"Loading Enhanced RAG demo with config: {capabilities['recommended_config']}")
# Set configuration environment variable
os.environ["RAG_CONFIG"] = f"config/{capabilities['recommended_config']}.yaml"
# Import and run Enhanced RAG demo
import streamlit_epic2_demo
streamlit_epic2_demo.main() # Actually call the main function
else:
# Fallback to basic demo with mock capabilities
st.info("""
**Demo Mode Active** - Limited functionality without Ollama or HF API access.
**System Capabilities** (when properly configured):
- Multi-document PDF processing with advanced parsing
- Hybrid semantic + keyword search with BM25 + vector similarity
- Neural reranking with cross-encoder models
- Graph-enhanced document relationships
- Real-time performance metrics and source attribution
""")
# Show system architecture
st.markdown("### ποΈ System Architecture")
st.markdown("""
**6-Component Modular Architecture:**
1. **Platform Orchestrator** - System lifecycle management
2. **Document Processor** - PDF parsing and chunking
3. **Embedder** - Text vectorization with MPS acceleration
4. **Retriever** - Hybrid search with graph enhancement
5. **Answer Generator** - LLM-based response synthesis
6. **Query Processor** - Workflow orchestration
""")
# Show system features
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Neural Reranking", "β
", delta="Cross-encoder")
with col2:
st.metric("Graph Enhancement", "β
", delta="Entity linking")
with col3:
st.metric("Architecture", "Modular", delta="6 components")
except ImportError as e:
st.error(f"Failed to import application modules: {e}")
st.info("Please ensure all dependencies are installed correctly.")
# Show installation guide
st.markdown("### π¦ Installation Guide")
st.code("""
# Install dependencies
pip install -r requirements.txt
# For local LLM (recommended)
ollama pull llama3.2:3b
# For API access (alternative)
export HF_TOKEN=your_token_here
""")
except Exception as e:
logger.error(f"Application error: {e}")
st.error(f"Application error: {e}")
st.info("Please check the logs for detailed error information.")
if __name__ == "__main__":
main() |