train-modle / Dockerfile.optimized
fokan's picture
Initial clean commit: Multi-Modal Knowledge Distillation Platform
ab4e093
# Optimized Dockerfile for AI Knowledge Distillation Platform
# Configured for CPU-only training with memory constraints
FROM python:3.10-slim
# Set environment variables for optimization
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
DEBIAN_FRONTEND=noninteractive
# CPU optimization environment variables
ENV OMP_NUM_THREADS=8 \
MKL_NUM_THREADS=8 \
NUMEXPR_NUM_THREADS=8 \
OPENBLAS_NUM_THREADS=8 \
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
TOKENIZERS_PARALLELISM=false \
CUDA_VISIBLE_DEVICES=""
# Cache directories
ENV HF_DATASETS_CACHE=/app/cache/datasets \
TRANSFORMERS_CACHE=/app/cache/transformers \
HF_HOME=/app/cache/huggingface
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
wget \
curl \
libopenblas-dev \
liblapack-dev \
libffi-dev \
libssl-dev \
libjpeg-dev \
libpng-dev \
libfreetype6-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
# Create app directory and user
RUN useradd -m -u 1000 appuser
WORKDIR /app
# Create necessary directories
RUN mkdir -p \
/app/cache/datasets \
/app/cache/transformers \
/app/cache/huggingface \
/app/cache/medical_datasets \
/app/database \
/app/logs \
/app/models \
/app/backups \
/app/uploads \
/app/temp
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies with optimizations
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Set ownership to appuser
RUN chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
# Create startup script
RUN echo '#!/bin/bash\n\
echo "πŸš€ Starting AI Knowledge Distillation Platform (Optimized)"\n\
echo "πŸ”§ CPU Cores: $(nproc)"\n\
echo "πŸ’Ύ Available Memory: $(free -h | grep Mem | awk '"'"'{print $7}'"'"')"\n\
echo "πŸ“ Cache Directory: $HF_DATASETS_CACHE"\n\
echo "🌐 Starting server on port 7860..."\n\
python run_optimized.py\n\
' > /app/start.sh && chmod +x /app/start.sh
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Expose port
EXPOSE 7860
# Set default command
CMD ["/app/start.sh"]
# Labels for metadata
LABEL maintainer="AI Knowledge Distillation Team" \
version="2.0.0" \
description="Optimized AI Knowledge Distillation Platform for CPU-only training" \
features="memory-management,cpu-optimization,medical-ai,token-management"