# Multi-stage build for authenticated model downloads FROM python:3.10-slim AS model-downloader # Install huggingface-cli RUN pip install huggingface_hub # Set working directory WORKDIR /model-downloader # Create directory for downloaded models RUN mkdir -p /model-downloader/models # This will run when building the image # You'll need to pass your Hugging Face token at build time ARG HF_TOKEN ENV HF_TOKEN=${HF_TOKEN} # Login and download model RUN if [ -n "$HF_TOKEN" ]; then \ huggingface-cli login --token ${HF_TOKEN}; \ huggingface-cli download sesame/csm-1b ckpt.pt --local-dir /model-downloader/models; \ else echo "No HF_TOKEN provided, model download will be skipped"; fi # Now for the main application stage FROM nvidia/cuda:12.4.0-base-ubuntu22.04 # Set environment variables ENV PYTHONFAULTHANDLER=1 \ PYTHONUNBUFFERED=1 \ PYTHONHASHSEED=random \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PIP_DEFAULT_TIMEOUT=100 \ NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6" \ TORCH_NVCC_FLAGS="-Xfatbin -compress-all" # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-dev \ ffmpeg \ git \ build-essential \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Create user and set up environment RUN useradd -m -u 1000 user USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH # Set working directory WORKDIR $HOME/app # Copy requirements first for better caching COPY --chown=user:user requirements.txt . # Create and set up persistent directories with proper permissions RUN mkdir -p $HOME/app/static $HOME/app/models $HOME/app/voice_memories $HOME/app/voice_references \ $HOME/app/voice_profiles $HOME/app/cloned_voices $HOME/app/audio_cache $HOME/app/tokenizers $HOME/app/logs && \ chmod -R 755 $HOME/app && \ chmod -R 777 $HOME/app/voice_references $HOME/app/voice_profiles $HOME/app/voice_memories \ $HOME/app/cloned_voices $HOME/app/audio_cache $HOME/app/static $HOME/app/logs $HOME/app/tokenizers $HOME/app/models # Copy static files COPY --chown=user:user ./static $HOME/app/static # Install Python dependencies RUN pip3 install --no-cache-dir --upgrade pip && \ pip3 install torch torchaudio numpy # Install torchao from source RUN pip3 install git+https://github.com/pytorch/ao.git # Install torchtune from source with specific branch for latest features RUN git clone https://github.com/pytorch/torchtune.git /tmp/torchtune && \ cd /tmp/torchtune && \ # Try to use the main branch, which should have llama3_2 git checkout main && \ pip install -e . # Install remaining dependencies RUN pip3 install -r requirements.txt # Install additional dependencies for streaming and voice cloning RUN pip3 install yt-dlp openai-whisper # Copy application code COPY --chown=user:user ./app $HOME/app/app # Copy downloaded model from the model-downloader stage COPY --chown=user:user --from=model-downloader /model-downloader/models $HOME/app/models # Show available models in torchtune RUN python3 -c "import torchtune.models; print('Available models in torchtune:', dir(torchtune.models))" # Expose port EXPOSE 7860 # Command to run the application CMD ["python3", "-m", "app.main"]