# Use a modern, slim Python base image FROM python:3.11-slim # Set the working directory WORKDIR /app # Install the essential build tools (C++ compiler, CMake) and git RUN apt-get update && apt-get install -y --no-install-recommends \ g++ \ cmake \ git \ build-essential # This is the critical step: # Install llama-cpp-python while forcing a CPU-optimized build using OpenBLAS. # This avoids the memory errors and stalling seen with the default build process. RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \ pip install llama-cpp-python --no-cache-dir # Now, install the other dependencies from requirements.txt COPY requirements.txt . RUN pip install -r requirements.txt --no-cache-dir # Copy the rest of your application code COPY . . # Expose the port Gradio runs on EXPOSE 7860 # The command to run your application CMD ["python", "app.py"]