Spaces:
Running
Running
# syntax=docker/dockerfile:1.6 | |
ARG PY_BASE=python:3.9-slim-bullseye | |
FROM ${PY_BASE} | |
ENV PYTHONDONTWRITEBYTECODE=1 \ | |
PYTHONUNBUFFERED=1 \ | |
PIP_NO_CACHE_DIR=1 \ | |
TOKENIZERS_PARALLELISM=false \ | |
OMP_NUM_THREADS=1 \ | |
TRANSFORMERS_CACHE=/cache/hf | |
# Force a rebuild when you tweak deps | |
ARG DEPS_REFRESH=2025-09-07-06 | |
ENV DEPS_REFRESH=$DEPS_REFRESH | |
RUN echo "CACHEBUSTER=$DEPS_REFRESH" | |
# System deps (tesseract + libs for opencv wheels) | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
ca-certificates curl \ | |
tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd \ | |
libgl1 libglib2.0-0 \ | |
&& rm -rf /var/lib/apt/lists/* | |
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata | |
WORKDIR /app | |
COPY requirements.txt . | |
# Install python deps | |
RUN python -m pip install --upgrade pip setuptools wheel \ | |
&& pip install --no-cache-dir -r requirements.txt | |
# Install spaCy model matching spaCy 3.2.x WITHOUT pulling new deps | |
RUN pip install --no-deps \ | |
"en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl" | |
# Robust version dump (no __version__ attribute assumptions) | |
RUN python - <<'PY' | |
import sys, pkgutil | |
try: | |
import importlib.metadata as md | |
except ImportError: | |
import importlib_metadata as md # py3.8 fallback (not used here) | |
def v(name): | |
try: return md.version(name) | |
except md.PackageNotFoundError: return "not-installed" | |
print("python:", sys.version.split()[0]) | |
for name in ("pydantic","typing-extensions","spacy","thinc","en-core-web-sm"): | |
print(f"{name}:", v(name)) | |
print("has en_core_web_sm:", bool(pkgutil.find_loader("en_core_web_sm"))) | |
PY | |
# App code | |
COPY . . | |
# Writable caches | |
RUN mkdir -p /cache/hf /tmp && chmod -R 777 /cache /tmp | |
# If you have a starter DB in the repo, uncomment the next line to seed it: | |
RUN mkdir -p /data && chmod -R 777 /data | |
#COPY app.db /data/app.db | |
ENV DB_DIR=/data | |
ENV DB_PATH=/data/app.db | |
# (optional) expose as a volume so you can mount from host if you want persistence | |
VOLUME ["/data"] | |
#ENV PORT=8000 | |
EXPOSE 7860 | |
HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ | |
CMD curl -fsS "http://127.0.0.1:${PORT:-7860}/api/health/" || exit 1 | |
# bind to $PORT provided by HF; include proxy headers | |
CMD ["sh","-c","uvicorn backend:app --host 0.0.0.0 --port ${PORT:-7860} --proxy-headers --forwarded-allow-ips='*'"] | |