# CUDA 12.1 (works best with PyTorch cu121 wheels on T4) + Ubuntu 22.04 FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 # ---- Basics & paths --------------------------------------------------------- ENV DEBIAN_FRONTEND=noninteractive WORKDIR /home/user/app # Runtime/env hygiene + stable caches/dirs ENV OMP_NUM_THREADS=1 \ TOKENIZERS_PARALLELISM=false \ PYTHONUNBUFFERED=1 \ HF_HOME=/home/user/.cache/huggingface \ TRANSFORMERS_CACHE=/home/user/.cache/huggingface/transformers \ HF_DATASETS_CACHE=/home/user/.cache/huggingface/datasets \ OFFLOAD_DIR=/home/user/app/offload \ MERGED_MODEL_DIR=/home/user/app/merged-model \ QUANTIZE=4bit # (Optional) helps some CUDA containers/tools detect GPU features ENV NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility # ---- System deps ------------------------------------------------------------ RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip git git-lfs ca-certificates && \ rm -rf /var/lib/apt/lists/* && \ git lfs install # ---- Python deps ------------------------------------------------------------ # 1) Upgrade pip RUN python3 -m pip install --upgrade pip # 2) GPU-enabled PyTorch for CUDA 12.1 (good match for T4) RUN python3 -m pip install --no-cache-dir \ --index-url https://download.pytorch.org/whl/cu121 \ torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 # 3) Project requirements (should include bitsandbytes etc.) COPY requirements.txt . RUN python3 -m pip install --no-cache-dir -r requirements.txt # ---- App code --------------------------------------------------------------- COPY . . RUN mkdir -p "$OFFLOAD_DIR" "$MERGED_MODEL_DIR" "$HF_HOME" "$TRANSFORMERS_CACHE" # ---- Networking ------------------------------------------------------------- EXPOSE 7860 # ---- Entrypoint ------------------------------------------------------------- CMD ["python3", "app.py"]