# Optimized Dockerfile for AI Knowledge Distillation Platform
# Configured for CPU-only training with memory constraints

FROM python:3.10-slim

# Set environment variables for optimization
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    DEBIAN_FRONTEND=noninteractive

# CPU optimization environment variables
ENV OMP_NUM_THREADS=8 \
    MKL_NUM_THREADS=8 \
    NUMEXPR_NUM_THREADS=8 \
    OPENBLAS_NUM_THREADS=8 \
    PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
    TOKENIZERS_PARALLELISM=false \
    CUDA_VISIBLE_DEVICES=""

# Cache directories
ENV HF_DATASETS_CACHE=/app/cache/datasets \
    TRANSFORMERS_CACHE=/app/cache/transformers \
    HF_HOME=/app/cache/huggingface

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    cmake \
    git \
    wget \
    curl \
    libopenblas-dev \
    liblapack-dev \
    libffi-dev \
    libssl-dev \
    libjpeg-dev \
    libpng-dev \
    libfreetype6-dev \
    pkg-config \
    && rm -rf /var/lib/apt/lists/*

# Create app directory and user
RUN useradd -m -u 1000 appuser
WORKDIR /app

# Create necessary directories
RUN mkdir -p \
    /app/cache/datasets \
    /app/cache/transformers \
    /app/cache/huggingface \
    /app/cache/medical_datasets \
    /app/database \
    /app/logs \
    /app/models \
    /app/backups \
    /app/uploads \
    /app/temp

# Copy requirements first for better caching
COPY requirements.txt .

# Install Python dependencies with optimizations
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
    pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
    pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# Set ownership to appuser
RUN chown -R appuser:appuser /app

# Switch to non-root user
USER appuser

# Create startup script
RUN echo '#!/bin/bash\n\
echo "🚀 Starting AI Knowledge Distillation Platform (Optimized)"\n\
echo "🔧 CPU Cores: $(nproc)"\n\
echo "💾 Available Memory: $(free -h | grep Mem | awk '"'"'{print $7}'"'"')"\n\
echo "📁 Cache Directory: $HF_DATASETS_CACHE"\n\
echo "🌐 Starting server on port 7860..."\n\
python run_optimized.py\n\
' > /app/start.sh && chmod +x /app/start.sh

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Expose port
EXPOSE 7860

# Set default command
CMD ["/app/start.sh"]

# Labels for metadata
LABEL maintainer="AI Knowledge Distillation Team" \
      version="2.0.0" \
      description="Optimized AI Knowledge Distillation Platform for CPU-only training" \
      features="memory-management,cpu-optimization,medical-ai,token-management"