Spaces:

kerols77
/

grade

Sleeping

App Files Files Community

kerols77 commited on Apr 21

Commit

09b8b7b

verified ·

1 Parent(s): e339b6b

Create Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +71 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,71 @@

+# Use an official Python runtime as the base image
+FROM python:3.9-slim
+# Ensure Python output is unbuffered for real-time logging
+ENV PYTHONUNBUFFERED=1
+# Set HOME so that PaddleOCR uses /app/.paddleocr instead of the root directory.
+ENV HOME=/app
+# Set custom cache directories for PaddleOCR and Transformers (for large model files)
+ENV PADDLEOCR_CACHE_DIR=/app/.cache/paddleocr
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
+# Create /app (ensuring HOME exists), install system dependencies including Git LFS and ccache,
+# then set up Git LFS configuration.
+RUN mkdir -p /app && \
+    apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        libgl1-mesa-glx \
+        libglib2.0-0 \
+        git-lfs \
+        ccache && \
+    # Create an empty .gitconfig to satisfy Git LFS requirements.
+    touch /app/.gitconfig && \
+    git lfs install --force && \
+    rm -rf /var/lib/apt/lists/*
+# Create necessary directories with full permissions so that model files can be downloaded.
+RUN mkdir -p /app/.paddleocr /app/.cache/paddleocr /app/.cache/huggingface && \
+    chmod -R 777 /app/.paddleocr /app/.cache/paddleocr /app/.cache/huggingface
+# Set the working directory for the container.
+WORKDIR /app
+# Copy the repository contents into the container.
+COPY . /app
+# Upgrade pip and install Python dependencies.
+# Note: We install PaddlePaddle (CPU-only) using the official find-links URL, then PaddleOCR.
+RUN pip install --upgrade pip setuptools wheel && \
+    pip install --no-cache-dir \
+      flask \
+      opencv-python-headless \
+      numpy \
+      spacy \
+      sentence-transformers \
+      transformers \
+      requests && \
+    pip install --no-cache-dir paddlepaddle==2.5.2 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \
+    pip install --no-cache-dir paddleocr
+# Download spaCy's English model ("en_core_web_md")
+RUN python -m spacy download en_core_web_md
+# Pre-load heavy models and tokenizers so they're cached inside the image:
+#   • SentenceTransformer's "all-mpnet-base-v2"
+#   • spaCy's "en_core_web_md"
+#   • Transformers model and tokenizer "roberta-large-mnli" used by your entailment classifier
+#   • PaddleOCR's models (downloaded into /app/.paddleocr)
+RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" && \
+    python -c "import spacy; spacy.load('en_core_web_md')" && \
+    python -c "from transformers import AutoModel, AutoTokenizer; AutoModel.from_pretrained('roberta-large-mnli'); AutoTokenizer.from_pretrained('roberta-large-mnli')" && \
+    python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en')"
+# Expose port 7860 for the Flask application.
+EXPOSE 7860
+# Specify the Flask application entry point.
+ENV FLASK_APP=app.py
+# Run the Flask application when the container starts.
+CMD ["flask", "run", "--host=0.0.0.0", "--port=7860"]