File size: 10,005 Bytes
5e1a30c
 
 
 
 
 
 
 
 
 
 
1cdeab3
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0eb61bf
 
0242f02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0eb61bf
 
 
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0242f02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e1a30c
1cdeab3
5e1a30c
 
 
1cdeab3
5e1a30c
 
 
 
 
0242f02
 
 
5e1a30c
 
 
 
 
1cdeab3
5e1a30c
1cdeab3
5e1a30c
1cdeab3
 
5e1a30c
1cdeab3
 
5e1a30c
 
 
 
 
1cdeab3
 
5e1a30c
 
 
 
1cdeab3
5e1a30c
212cdc0
5e1a30c
 
 
 
 
 
 
1cdeab3
5e1a30c
1cdeab3
 
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cdeab3
5e1a30c
 
1cdeab3
5e1a30c
1cdeab3
5e1a30c
1cdeab3
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/usr/bin/env python3
"""
HuggingFace Spaces deployment wrapper for the Technical Documentation RAG Assistant.

This file serves as the main entry point for HuggingFace Spaces deployment,
with optimizations for cloud hosting and resource constraints.

Features:
- Automatic environment detection (HF Spaces vs local)
- Graceful fallbacks for missing dependencies
- Memory-optimized configuration
- Neural reranking and graph enhancement capabilities
"""

import os
import sys
from pathlib import Path
import streamlit as st
import subprocess
import time
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure for HuggingFace Spaces deployment
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
os.environ["STREAMLIT_SERVER_ENABLE_CORS"] = "false" 
os.environ["STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION"] = "false"

# Setup cache directories for HuggingFace Spaces
cache_base = "/tmp/.cache"
os.environ.setdefault("HF_HOME", f"{cache_base}/huggingface")
os.environ.setdefault("TRANSFORMERS_CACHE", f"{cache_base}/huggingface/transformers")
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", f"{cache_base}/sentence-transformers")

# Create cache directories
for cache_dir in [
    os.environ["HF_HOME"],
    os.environ["TRANSFORMERS_CACHE"], 
    os.environ["SENTENCE_TRANSFORMERS_HOME"]
]:
    try:
        os.makedirs(cache_dir, exist_ok=True)
    except Exception as e:
        logger.warning(f"Could not create cache directory {cache_dir}: {e}")

# Environment constants
IS_HF_SPACES = True  # This app is designed for HuggingFace Spaces deployment
IS_LOCAL_DEV = False

# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))


def check_environment_capabilities():
    """Check environment capabilities and suggest appropriate configuration."""
    capabilities = {
        "has_ollama": False,
        "has_hf_token": False,
        "memory_optimized": IS_HF_SPACES,
        "recommended_config": "default"
    }
    
    # Check Ollama availability
    try:
        result = subprocess.run(['which', 'ollama'], capture_output=True, text=True, timeout=5)
        if result.returncode == 0:
            # Check if service is running and model available
            result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=10)
            if result.returncode == 0 and 'llama3.2:3b' in result.stdout:
                capabilities["has_ollama"] = True
                logger.info("Ollama with llama3.2:3b detected")
    except (subprocess.TimeoutExpired, Exception) as e:
        logger.info(f"Ollama check failed or timed out: {e}")

    # Check HuggingFace token availability
    hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
    if hf_token:
        capabilities["has_hf_token"] = True
        logger.info("HuggingFace token detected")

    # Recommend configuration based on capabilities
    if capabilities["has_hf_token"]:
        capabilities["recommended_config"] = "epic2_hf_api"
    elif capabilities["has_ollama"]:
        capabilities["recommended_config"] = "epic2_graph_calibrated" 
    else:
        capabilities["recommended_config"] = "default"
        
    return capabilities


def setup_environment_display(capabilities):
    """Display environment status and configuration recommendations."""
    
    st.sidebar.markdown("### πŸ”§ Environment Status")
    
    # Environment detection
    if IS_HF_SPACES:
        st.sidebar.success("🌐 Running on HuggingFace Spaces")
    else:
        st.sidebar.info("πŸ’» Running locally")
    
    # Capability status
    if capabilities["has_ollama"]:
        st.sidebar.success("βœ… Ollama + Llama 3.2 available")
    else:
        st.sidebar.warning("⚠️ Ollama not available")
        
    if capabilities["has_hf_token"]:
        st.sidebar.success("βœ… HuggingFace API available")
    else:
        st.sidebar.info("πŸ’‘ Add HF_TOKEN for API access")
    
    # Configuration recommendation
    config = capabilities["recommended_config"]
    st.sidebar.markdown(f"**Recommended Config**: `{config}`")
    
    # Setup instructions if needed
    if not capabilities["has_ollama"] and not capabilities["has_hf_token"]:
        st.sidebar.markdown("""
        **Setup Options:**
        1. **API Mode**: Set HF_TOKEN environment variable
        2. **Local Mode**: Install Ollama + `ollama pull llama3.2:3b`
        3. **Demo Mode**: Use mock configuration
        """)
    
    return capabilities


def setup_models_if_needed():
    """Setup models if needed for cloud deployment."""
    try:
        # Quick validation of critical dependencies
        import rank_bm25
        import pdfplumber
        logger.info("βœ… Critical dependencies available")
        
        # Check if we need to setup spaCy model
        try:
            import spacy
            spacy.load("en_core_web_sm")
            logger.info("βœ… spaCy model available")
        except OSError:
            logger.info("πŸ“₯ Setting up spaCy model...")
            try:
                import subprocess
                result = subprocess.run([
                    sys.executable, "-m", "spacy", "download", "en_core_web_sm"
                ], capture_output=True, text=True, timeout=300)
                if result.returncode == 0:
                    logger.info("βœ… spaCy model setup complete")
                else:
                    logger.warning("⚠️ spaCy model setup failed - entity extraction may be limited")
            except Exception as e:
                logger.warning(f"⚠️ spaCy model auto-setup failed: {e}")
        except ImportError:
            logger.warning("⚠️ spaCy not available")
            
    except ImportError as e:
        logger.error(f"❌ Critical dependency missing: {e}")
        st.error(f"Critical dependency missing: {e}")
        st.info("Please install missing packages with: pip install -r requirements.txt")
        st.stop()


def main():
    """Main application entry point with Enhanced RAG capabilities."""
    
    # Page configuration
    st.set_page_config(
        page_title="Enhanced RISC-V RAG Demo",
        page_icon="πŸš€",
        layout="wide",
        initial_sidebar_state="expanded"
    )
    
    # Setup models if needed
    setup_models_if_needed()
    
    # Check environment capabilities
    capabilities = check_environment_capabilities()
    setup_environment_display(capabilities)
    
    # Main application header
    st.title("πŸš€ Enhanced RISC-V RAG")
    st.markdown("""
    **Advanced RAG System for Technical Documentation**
    
    This system demonstrates advanced RAG capabilities with:
    - 🧠 **Neural reranking** with cross-encoder models
    - πŸ”— **Graph enhancement** for document relationships  
    - πŸ” **Hybrid search** combining semantic and keyword matching
    - πŸ› οΈ **Modular architecture** with 6 specialized components
    """)
    
    # Import and run the appropriate app based on capabilities
    try:
        if capabilities["has_hf_token"] or capabilities["has_ollama"]:
            # Use Enhanced RAG demo with full capabilities
            logger.info(f"Loading Enhanced RAG demo with config: {capabilities['recommended_config']}")
            
            # Set configuration environment variable
            os.environ["RAG_CONFIG"] = f"config/{capabilities['recommended_config']}.yaml"
            
            # Import and run Enhanced RAG demo
            import streamlit_epic2_demo
            streamlit_epic2_demo.main()  # Actually call the main function
            
        else:
            # Fallback to basic demo with mock capabilities
            st.info("""
            **Demo Mode Active** - Limited functionality without Ollama or HF API access.
            
            **System Capabilities** (when properly configured):
            - Multi-document PDF processing with advanced parsing
            - Hybrid semantic + keyword search with BM25 + vector similarity
            - Neural reranking with cross-encoder models
            - Graph-enhanced document relationships
            - Real-time performance metrics and source attribution
            """)
            
            # Show system architecture
            st.markdown("### πŸ—οΈ System Architecture")
            st.markdown("""
            **6-Component Modular Architecture:**
            1. **Platform Orchestrator** - System lifecycle management
            2. **Document Processor** - PDF parsing and chunking
            3. **Embedder** - Text vectorization with MPS acceleration  
            4. **Retriever** - Hybrid search with graph enhancement
            5. **Answer Generator** - LLM-based response synthesis
            6. **Query Processor** - Workflow orchestration
            """)
            
            # Show system features
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Neural Reranking", "βœ…", delta="Cross-encoder")
            with col2:
                st.metric("Graph Enhancement", "βœ…", delta="Entity linking")
            with col3:
                st.metric("Architecture", "Modular", delta="6 components")
        
    except ImportError as e:
        st.error(f"Failed to import application modules: {e}")
        st.info("Please ensure all dependencies are installed correctly.")
        
        # Show installation guide
        st.markdown("### πŸ“¦ Installation Guide")
        st.code("""
        # Install dependencies
        pip install -r requirements.txt
        
        # For local LLM (recommended)
        ollama pull llama3.2:3b
        
        # For API access (alternative)
        export HF_TOKEN=your_token_here
        """)
        
    except Exception as e:
        logger.error(f"Application error: {e}")
        st.error(f"Application error: {e}")
        st.info("Please check the logs for detailed error information.")


if __name__ == "__main__":
    main()