File size: 3,900 Bytes
99ee319
 
f20194e
 
 
99ee319
 
 
 
de8f515
99ee319
 
 
 
 
 
 
f20194e
99ee319
 
 
 
f20194e
9374c2e
 
 
 
 
f20194e
 
99ee319
de8f515
99ee319
de8f515
 
 
 
 
 
 
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de8f515
 
 
 
 
f20194e
 
 
 
 
 
de8f515
 
 
 
 
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
99ee319
9374c2e
 
99ee319
f20194e
 
 
 
 
 
99ee319
 
 
 
 
9374c2e
99ee319
 
356ac4f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
FROM python:3.11-slim

# Create a user with ID 1000 (required for HF Spaces)
RUN useradd -u 1000 -m -s /bin/bash appuser

WORKDIR /app

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV DOCKER_ENV=true

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements and install (before switching to appuser)
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Set environment variables for writable paths BEFORE any Python operations
ENV DATA_DIR=/tmp/researchmate/data
ENV LOGS_DIR=/tmp/researchmate/logs
ENV CHROMA_DIR=/tmp/researchmate/chroma_persist
ENV UPLOADS_DIR=/tmp/researchmate/uploads
ENV CHROMA_DB_DIR=/tmp/researchmate/chroma_db
ENV CONFIG_DIR=/tmp/researchmate/config
ENV TEMP_DIR=/tmp/researchmate/tmp

# Set all cache directories to writable locations
ENV MPLCONFIGDIR=/tmp/matplotlib
ENV TRANSFORMERS_CACHE=/tmp/transformers
ENV HF_HOME=/tmp/huggingface
ENV SENTENCE_TRANSFORMERS_HOME=/tmp/sentence_transformers
ENV HF_DATASETS_CACHE=/tmp/datasets
ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_hub
ENV XDG_CACHE_HOME=/tmp/cache

# Additional environment variables to prevent /data access
ENV PYTORCH_KERNEL_CACHE_PATH=/tmp/cache
ENV TORCH_HOME=/tmp/cache
ENV NLTK_DATA=/tmp/cache/nltk_data
ENV TOKENIZERS_PARALLELISM=false

# Override any hardcoded paths
ENV HOME=/tmp/cache
ENV TMPDIR=/tmp/researchmate/tmp

# Pre-download embedding models with correct names and proper cache paths
RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" || echo "Failed to download all-MiniLM-L6-v2"
RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" || echo "Failed to download all-mpnet-base-v2"

# Create all necessary directories with proper permissions
RUN mkdir -p /tmp/researchmate/data \
             /tmp/researchmate/logs \
             /tmp/researchmate/chroma_persist \
             /tmp/researchmate/uploads \
             /tmp/researchmate/chroma_db \
             /tmp/researchmate/config \
             /tmp/researchmate/tmp \
             /tmp/matplotlib \
             /tmp/transformers \
             /tmp/huggingface \
             /tmp/sentence_transformers \
             /tmp/datasets \
             /tmp/huggingface_hub \
             /tmp/cache \
             /tmp/cache/nltk_data \
             /app/cache \
             /app/tmp && \
    chmod -R 777 /tmp/researchmate \
                 /tmp/matplotlib \
                 /tmp/transformers \
                 /tmp/huggingface \
                 /tmp/sentence_transformers \
                 /tmp/datasets \
                 /tmp/huggingface_hub \
                 /tmp/cache \
                 /app/cache \
                 /app/tmp && \
    chown -R appuser:appuser /tmp/researchmate \
                             /tmp/matplotlib \
                             /tmp/transformers \
                             /tmp/huggingface \
                             /tmp/sentence_transformers \
                             /tmp/datasets \
                             /tmp/huggingface_hub \
                             /tmp/cache \
                             /app/cache \
                             /app/tmp

# Copy application code
COPY . .

# Change ownership of the app directory
RUN chown -R appuser:appuser /app

# Switch to the app user
USER appuser

# Spaces uses port 7860
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Start the application
CMD ["python", "app.py"]