ResearchMate / Dockerfile
Ananthakr1shnan's picture
Updated files
356ac4f
FROM python:3.11-slim
# Create a user with ID 1000 (required for HF Spaces)
RUN useradd -u 1000 -m -s /bin/bash appuser
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV DOCKER_ENV=true
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install (before switching to appuser)
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Set environment variables for writable paths BEFORE any Python operations
ENV DATA_DIR=/tmp/researchmate/data
ENV LOGS_DIR=/tmp/researchmate/logs
ENV CHROMA_DIR=/tmp/researchmate/chroma_persist
ENV UPLOADS_DIR=/tmp/researchmate/uploads
ENV CHROMA_DB_DIR=/tmp/researchmate/chroma_db
ENV CONFIG_DIR=/tmp/researchmate/config
ENV TEMP_DIR=/tmp/researchmate/tmp
# Set all cache directories to writable locations
ENV MPLCONFIGDIR=/tmp/matplotlib
ENV TRANSFORMERS_CACHE=/tmp/transformers
ENV HF_HOME=/tmp/huggingface
ENV SENTENCE_TRANSFORMERS_HOME=/tmp/sentence_transformers
ENV HF_DATASETS_CACHE=/tmp/datasets
ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_hub
ENV XDG_CACHE_HOME=/tmp/cache
# Additional environment variables to prevent /data access
ENV PYTORCH_KERNEL_CACHE_PATH=/tmp/cache
ENV TORCH_HOME=/tmp/cache
ENV NLTK_DATA=/tmp/cache/nltk_data
ENV TOKENIZERS_PARALLELISM=false
# Override any hardcoded paths
ENV HOME=/tmp/cache
ENV TMPDIR=/tmp/researchmate/tmp
# Pre-download embedding models with correct names and proper cache paths
RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" || echo "Failed to download all-MiniLM-L6-v2"
RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" || echo "Failed to download all-mpnet-base-v2"
# Create all necessary directories with proper permissions
RUN mkdir -p /tmp/researchmate/data \
/tmp/researchmate/logs \
/tmp/researchmate/chroma_persist \
/tmp/researchmate/uploads \
/tmp/researchmate/chroma_db \
/tmp/researchmate/config \
/tmp/researchmate/tmp \
/tmp/matplotlib \
/tmp/transformers \
/tmp/huggingface \
/tmp/sentence_transformers \
/tmp/datasets \
/tmp/huggingface_hub \
/tmp/cache \
/tmp/cache/nltk_data \
/app/cache \
/app/tmp && \
chmod -R 777 /tmp/researchmate \
/tmp/matplotlib \
/tmp/transformers \
/tmp/huggingface \
/tmp/sentence_transformers \
/tmp/datasets \
/tmp/huggingface_hub \
/tmp/cache \
/app/cache \
/app/tmp && \
chown -R appuser:appuser /tmp/researchmate \
/tmp/matplotlib \
/tmp/transformers \
/tmp/huggingface \
/tmp/sentence_transformers \
/tmp/datasets \
/tmp/huggingface_hub \
/tmp/cache \
/app/cache \
/app/tmp
# Copy application code
COPY . .
# Change ownership of the app directory
RUN chown -R appuser:appuser /app
# Switch to the app user
USER appuser
# Spaces uses port 7860
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Start the application
CMD ["python", "app.py"]