Spaces:
Sleeping
Sleeping
# Use Python 3.12 as the base image | |
FROM python:3.12-slim | |
# Set working directory in the container | |
WORKDIR /app | |
# Install system dependencies including Tesseract OCR | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
gcc \ | |
python3-dev \ | |
tesseract-ocr \ | |
libtesseract-dev \ | |
tesseract-ocr-eng \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Create a non-root user and set ownership | |
RUN useradd -m -u 1000 appuser && \ | |
chown -R appuser:appuser /app | |
# Copy requirements first to leverage Docker cache | |
COPY pyproject.toml . | |
# Install Python dependencies | |
RUN pip install --no-cache-dir --upgrade pip && \ | |
pip install --no-cache-dir . | |
# Copy the rest of the application | |
COPY . . | |
# Create ALL needed directories with proper permissions | |
RUN mkdir -p temp uploads \ | |
/app/.cache \ | |
/app/nltk_data \ | |
/app/app/routers/temp \ | |
/app/app/config/temp && \ | |
chown -R appuser:appuser /app && \ | |
chmod -R 777 temp uploads /app/.cache /app/nltk_data /app/app/routers/temp /app/app/config/temp | |
# Set environment variables for cache directories and Tesseract | |
ENV HF_HOME=/app/.cache/huggingface \ | |
TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ | |
PYTORCH_PRETRAINED_BERT_CACHE=/app/.cache/torch \ | |
NLTK_DATA=/app/nltk_data \ | |
XDG_CACHE_HOME=/app/.cache \ | |
TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata \ | |
TESSERACT_CMD=/usr/bin/tesseract \ | |
PATH=/usr/bin:$PATH | |
# Verify Tesseract installation | |
RUN tesseract --version | |
# Switch to non-root user | |
USER appuser | |
# Expose the port that Hugging Face Spaces expects | |
EXPOSE 7860 | |
# Command to run the application using Uvicorn on port 7860 | |
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"] |