Spaces:
Running
Running
File size: 1,138 Bytes
380fa4c 3fbb6a1 be51358 3fbb6a1 937d5fa 3fbb6a1 43b8a1d 937d5fa be51358 32e9a12 d476ac9 380fa4c be51358 32e9a12 3fbb6a1 380fa4c 937d5fa 3fbb6a1 32e9a12 937d5fa 3fbb6a1 32e9a12 3fbb6a1 32e9a12 937d5fa 3fbb6a1 32e9a12 be51358 380fa4c 32e9a12 380fa4c 3fbb6a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# Use a lightweight Python base image
FROM python:3.10-slim
# Install system-level dependencies
RUN apt-get update && apt-get install -y \
tesseract-ocr \
libglib2.0-0 \
libgl1 \
libsm6 \
libxext6 \
libxrender-dev \
poppler-utils \
ffmpeg \
git \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# Verify Tesseract installation
RUN ln -s /usr/bin/tesseract /usr/local/bin/tesseract && \
tesseract --version
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV TESSERACT_PATH=/usr/bin/tesseract
ENV PATH="${TESSERACT_PATH}:${PATH}"
# Set working directory
WORKDIR /home/user/app
# Install Python dependencies
COPY requirements.txt .
RUN pip install --upgrade pip && pip install -r requirements.txt
# Copy application code
COPY . .
# Download NLTK tokenizer
RUN python -m nltk.downloader punkt
# Expose port for Gradio
EXPOSE 7860
# Optional: container health check
HEALTHCHECK CMD curl --fail http://localhost:7860 || exit 1
# Start the application
CMD ["python", "app.py"]
|