SmartManuals-AI / Dockerfile
damoojeje's picture
Update Dockerfile
380fa4c verified
# Use a lightweight Python base image
FROM python:3.10-slim
# Install system-level dependencies
RUN apt-get update && apt-get install -y \
tesseract-ocr \
libglib2.0-0 \
libgl1 \
libsm6 \
libxext6 \
libxrender-dev \
poppler-utils \
ffmpeg \
git \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# Verify Tesseract installation
RUN ln -s /usr/bin/tesseract /usr/local/bin/tesseract && \
tesseract --version
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV TESSERACT_PATH=/usr/bin/tesseract
ENV PATH="${TESSERACT_PATH}:${PATH}"
# Set working directory
WORKDIR /home/user/app
# Install Python dependencies
COPY requirements.txt .
RUN pip install --upgrade pip && pip install -r requirements.txt
# Copy application code
COPY . .
# Download NLTK tokenizer
RUN python -m nltk.downloader punkt
# Expose port for Gradio
EXPOSE 7860
# Optional: container health check
HEALTHCHECK CMD curl --fail http://localhost:7860 || exit 1
# Start the application
CMD ["python", "app.py"]