# Use a modern, slim Python base image | |
FROM python:3.11-slim | |
# Set the working directory | |
WORKDIR /app | |
# Install the essential build tools (C++ compiler, CMake) and git | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
g++ \ | |
cmake \ | |
git \ | |
build-essential | |
# This is the critical step: | |
# Install llama-cpp-python while forcing a CPU-optimized build using OpenBLAS. | |
# This avoids the memory errors and stalling seen with the default build process. | |
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \ | |
pip install llama-cpp-python --no-cache-dir | |
# Now, install the other dependencies from requirements.txt | |
COPY requirements.txt . | |
RUN pip install -r requirements.txt --no-cache-dir | |
# Copy the rest of your application code | |
COPY . . | |
# Expose the port Gradio runs on | |
EXPOSE 7860 | |
# The command to run your application | |
CMD ["python", "app.py"] |