File size: 1,367 Bytes
3080fd5
6d2ea02
 
4a007cb
 
 
 
 
 
 
3080fd5
 
6d2ea02
4a007cb
3080fd5
 
 
6d2ea02
4a007cb
3080fd5
6d2ea02
e4c3b29
 
 
 
 
3080fd5
6d2ea02
e4c3b29
3080fd5
 
 
8f0965e
e4c3b29
3080fd5
6d2ea02
e4c3b29
6d2ea02
 
e4c3b29
8f0965e
 
 
e4c3b29
6d2ea02
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 1. Base Image
FROM python:3.10-slim

# 2. Install system dependencies REQUIRED FOR COMPILING llama-cpp-python
RUN apt-get update && apt-get install -y \
    build-essential \
    cmake \
    && rm -rf /var/lib/apt/lists/*

# 3. Set up a non-root user
RUN useradd -m -u 1000 user
USER user

# 4. Set Environment Variables & Working Directory
ENV HOME=/home/user
ENV PATH=$HOME/.local/bin:$PATH
WORKDIR $HOME/app

# 5. Copy requirements first for better Docker layer caching
COPY --chown=user requirements.txt .

# 6. Set build arguments to speed up llama-cpp-python installation
# This is the key fix for the timeout error. It disables GPU support checks.
ENV CMAKE_ARGS="-DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF"

# 7. Install Python dependencies for the non-root user
RUN pip install --no-cache-dir --user -r requirements.txt

# 8. Download the model during the build process
RUN huggingface-cli download Dnfs/gema-4b-indra10k-model1-Q4_K_M-GGUF \
    --local-dir ./model \
    --local-dir-use-symlinks False

# 9. Copy the rest of the application code
COPY --chown=user app.py .

# 10. Expose the port the app runs on
EXPOSE 8000

# 11. Health check to ensure the app is running
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
  CMD curl -f http://localhost:8000/health || exit 1

# 12. Command to run the application
CMD ["python", "app.py"]