Spaces:
Running
Running
# Use a lightweight Python base image | |
FROM python:3.10-slim | |
# Install system-level dependencies | |
RUN apt-get update && apt-get install -y \ | |
tesseract-ocr \ | |
libglib2.0-0 \ | |
libgl1 \ | |
libsm6 \ | |
libxext6 \ | |
libxrender-dev \ | |
poppler-utils \ | |
ffmpeg \ | |
git \ | |
build-essential \ | |
curl \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Verify Tesseract installation | |
RUN ln -s /usr/bin/tesseract /usr/local/bin/tesseract && \ | |
tesseract --version | |
# Set environment variables | |
ENV PYTHONUNBUFFERED=1 | |
ENV PIP_NO_CACHE_DIR=1 | |
ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1 | |
ENV LANG=C.UTF-8 | |
ENV LC_ALL=C.UTF-8 | |
ENV TESSERACT_PATH=/usr/bin/tesseract | |
ENV PATH="${TESSERACT_PATH}:${PATH}" | |
# Set working directory | |
WORKDIR /home/user/app | |
# Install Python dependencies | |
COPY requirements.txt . | |
RUN pip install --upgrade pip && pip install -r requirements.txt | |
# Copy application code | |
COPY . . | |
# Download NLTK tokenizer | |
RUN python -m nltk.downloader punkt | |
# Expose port for Gradio | |
EXPOSE 7860 | |
# Optional: container health check | |
HEALTHCHECK CMD curl --fail http://localhost:7860 || exit 1 | |
# Start the application | |
CMD ["python", "app.py"] | |