# Use Python 3.12 as the base image FROM python:3.12-slim # Set working directory in the container WORKDIR /app # Install system dependencies including Tesseract OCR RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ python3-dev \ tesseract-ocr \ libtesseract-dev \ tesseract-ocr-eng \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Create a non-root user and set ownership RUN useradd -m -u 1000 appuser && \ chown -R appuser:appuser /app # Copy requirements first to leverage Docker cache COPY pyproject.toml . # Install Python dependencies RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir . # Copy the rest of the application COPY . . # Create ALL needed directories with proper permissions RUN mkdir -p temp uploads \ /app/.cache \ /app/nltk_data \ /app/app/routers/temp \ /app/app/config/temp && \ chown -R appuser:appuser /app && \ chmod -R 777 temp uploads /app/.cache /app/nltk_data /app/app/routers/temp /app/app/config/temp # Set environment variables for cache directories and Tesseract ENV HF_HOME=/app/.cache/huggingface \ TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ PYTORCH_PRETRAINED_BERT_CACHE=/app/.cache/torch \ NLTK_DATA=/app/nltk_data \ XDG_CACHE_HOME=/app/.cache \ TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata \ TESSERACT_CMD=/usr/bin/tesseract \ PATH=/usr/bin:$PATH # Verify Tesseract installation RUN tesseract --version # Switch to non-root user USER appuser # Expose the port that Hugging Face Spaces expects EXPOSE 7860 # Command to run the application using Uvicorn on port 7860 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]