Spaces:
Running
Running
File size: 2,402 Bytes
aa1694d c46fce7 cc07d68 aa1694d c46fce7 d81e914 c46fce7 aa1694d cc07d68 c46fce7 aa1694d d81e914 cc07d68 c46fce7 cc07d68 48500f7 c46fce7 d81e914 c46fce7 d81e914 c46fce7 d81e914 cc07d68 c46fce7 aa1694d c46fce7 aa1694d ec85693 d84ecbc c46fce7 aa1694d 732e860 aa1694d ec85693 e9537a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# syntax=docker/dockerfile:1.6
ARG PY_BASE=python:3.9-slim-bullseye
FROM ${PY_BASE}
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
TOKENIZERS_PARALLELISM=false \
OMP_NUM_THREADS=1 \
TRANSFORMERS_CACHE=/cache/hf
# Force a rebuild when you tweak deps
ARG DEPS_REFRESH=2025-09-07-06
ENV DEPS_REFRESH=$DEPS_REFRESH
RUN echo "CACHEBUSTER=$DEPS_REFRESH"
# System deps (tesseract + libs for opencv wheels)
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl \
tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd \
libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata
WORKDIR /app
COPY requirements.txt .
# Install python deps
RUN python -m pip install --upgrade pip setuptools wheel \
&& pip install --no-cache-dir -r requirements.txt
# Install spaCy model matching spaCy 3.2.x WITHOUT pulling new deps
RUN pip install --no-deps \
"en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
# Robust version dump (no __version__ attribute assumptions)
RUN python - <<'PY'
import sys, pkgutil
try:
import importlib.metadata as md
except ImportError:
import importlib_metadata as md # py3.8 fallback (not used here)
def v(name):
try: return md.version(name)
except md.PackageNotFoundError: return "not-installed"
print("python:", sys.version.split()[0])
for name in ("pydantic","typing-extensions","spacy","thinc","en-core-web-sm"):
print(f"{name}:", v(name))
print("has en_core_web_sm:", bool(pkgutil.find_loader("en_core_web_sm")))
PY
# App code
COPY . .
# Writable caches
RUN mkdir -p /cache/hf /tmp && chmod -R 777 /cache /tmp
# If you have a starter DB in the repo, uncomment the next line to seed it:
RUN mkdir -p /data && chmod -R 777 /data
#COPY app.db /data/app.db
ENV DB_DIR=/data
ENV DB_PATH=/data/app.db
# (optional) expose as a volume so you can mount from host if you want persistence
VOLUME ["/data"]
#ENV PORT=8000
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
CMD curl -fsS "http://127.0.0.1:${PORT:-7860}/api/health/" || exit 1
# bind to $PORT provided by HF; include proxy headers
CMD ["sh","-c","uvicorn backend:app --host 0.0.0.0 --port ${PORT:-7860} --proxy-headers --forwarded-allow-ips='*'"]
|