# Base image with Python and llama-cpp dependencies FROM python:3.11-slim ENV HF_HOME=/tmp/huggingface ENV HF_HUB_CACHE=/tmp/huggingface # System dependencies for llama-cpp RUN apt-get update && apt-get install -y \ build-essential \ cmake \ wget \ git \ && rm -rf /var/lib/apt/lists/* # Install Python packages RUN pip install --no-cache-dir \ llama-cpp-python==0.2.66 \ fastapi \ uvicorn \ huggingface-hub \ starlette # Create app directory WORKDIR /app COPY . /app # Download model from Hugging Face Hub (on container startup) ENV MODEL_REPO=TheBloke/phi-2-GGUF ENV MODEL_FILE=phi-2.Q4_K_M.gguf COPY start.sh . RUN chmod +x start.sh CMD ["./start.sh"]