# Use Python 3.12 as the base image
FROM python:3.12-slim

# Set working directory in the container
WORKDIR /app

# Install system dependencies including Tesseract OCR
RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
    python3-dev \
    tesseract-ocr \
    libtesseract-dev \
    tesseract-ocr-eng \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Create a non-root user and set ownership
RUN useradd -m -u 1000 appuser && \
    chown -R appuser:appuser /app

# Copy requirements first to leverage Docker cache
COPY pyproject.toml .

# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir .

# Copy the rest of the application
COPY . .

# Create ALL needed directories with proper permissions
RUN mkdir -p temp uploads \
    /app/.cache \
    /app/nltk_data \
    /app/app/routers/temp \
    /app/app/config/temp && \
    chown -R appuser:appuser /app && \
    chmod -R 777 temp uploads /app/.cache /app/nltk_data /app/app/routers/temp /app/app/config/temp

# Set environment variables for cache directories and Tesseract
ENV HF_HOME=/app/.cache/huggingface \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    PYTORCH_PRETRAINED_BERT_CACHE=/app/.cache/torch \
    NLTK_DATA=/app/nltk_data \
    XDG_CACHE_HOME=/app/.cache \
    TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata \
    TESSERACT_CMD=/usr/bin/tesseract \
    PATH=/usr/bin:$PATH

# Verify Tesseract installation
RUN tesseract --version

# Switch to non-root user
USER appuser

# Expose the port that Hugging Face Spaces expects
EXPOSE 7860

# Command to run the application using Uvicorn on port 7860
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]