# Use NVIDIA CUDA base image with Python support (ONLY for GPU Spaces) FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 # Set working directory WORKDIR /app # Install Python and system dependencies RUN apt-get update && apt-get install -y \ python3.10 \ python3-pip \ python3.10-venv \ python3.10-dev \ build-essential \ cmake \ git \ curl \ wget \ && rm -rf /var/lib/apt/lists/* # Make python3.10 the default RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 # Upgrade pip RUN pip install --upgrade pip # Install prebuilt llama-cpp-python CUDA wheel RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install --no-cache-dir llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 # Copy requirements.txt and install remaining dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Create a non-root user RUN useradd -m -u 1000 user # Create cache directory with proper permissions RUN mkdir -p /app/.cache/huggingface RUN chown -R user:user /app # Copy app code COPY . . RUN chown -R user:user /app # Create models directory RUN mkdir -p models && chown -R user:user models # Switch to non-root user USER user # Environment variables ENV GRADIO_SERVER_NAME="0.0.0.0" ENV GRADIO_SERVER_PORT=7860 ENV CUDA_VISIBLE_DEVICES=0 ENV HF_HOME=/app/.cache/huggingface ENV TRANSFORMERS_CACHE=/app/.cache/huggingface # Expose Gradio port EXPOSE 7860 # Start app CMD ["python", "app.py"]