PRMSChallenge / Dockerfile
Vineela Gampa
fixes for front end
ec85693 unverified
# syntax=docker/dockerfile:1.6
ARG PY_BASE=python:3.9-slim-bullseye
FROM ${PY_BASE}
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
TOKENIZERS_PARALLELISM=false \
OMP_NUM_THREADS=1 \
TRANSFORMERS_CACHE=/cache/hf
# Force a rebuild when you tweak deps
ARG DEPS_REFRESH=2025-09-07-06
ENV DEPS_REFRESH=$DEPS_REFRESH
RUN echo "CACHEBUSTER=$DEPS_REFRESH"
# System deps (tesseract + libs for opencv wheels)
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl \
tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd \
libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata
WORKDIR /app
COPY requirements.txt .
# Install python deps
RUN python -m pip install --upgrade pip setuptools wheel \
&& pip install --no-cache-dir -r requirements.txt
# Install spaCy model matching spaCy 3.2.x WITHOUT pulling new deps
RUN pip install --no-deps \
"en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
# Robust version dump (no __version__ attribute assumptions)
RUN python - <<'PY'
import sys, pkgutil
try:
import importlib.metadata as md
except ImportError:
import importlib_metadata as md # py3.8 fallback (not used here)
def v(name):
try: return md.version(name)
except md.PackageNotFoundError: return "not-installed"
print("python:", sys.version.split()[0])
for name in ("pydantic","typing-extensions","spacy","thinc","en-core-web-sm"):
print(f"{name}:", v(name))
print("has en_core_web_sm:", bool(pkgutil.find_loader("en_core_web_sm")))
PY
# App code
COPY . .
# Writable caches
RUN mkdir -p /cache/hf /tmp && chmod -R 777 /cache /tmp
# If you have a starter DB in the repo, uncomment the next line to seed it:
RUN mkdir -p /data && chmod -R 777 /data
#COPY app.db /data/app.db
ENV DB_DIR=/data
ENV DB_PATH=/data/app.db
# (optional) expose as a volume so you can mount from host if you want persistence
VOLUME ["/data"]
#ENV PORT=8000
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
CMD curl -fsS "http://127.0.0.1:${PORT:-7860}/api/health/" || exit 1
# bind to $PORT provided by HF; include proxy headers
CMD ["sh","-c","uvicorn backend:app --host 0.0.0.0 --port ${PORT:-7860} --proxy-headers --forwarded-allow-ips='*'"]