kerols77 commited on
Commit
09b8b7b
·
verified ·
1 Parent(s): e339b6b

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +71 -0
Dockerfile ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+
4
+ # Ensure Python output is unbuffered for real-time logging
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ # Set HOME so that PaddleOCR uses /app/.paddleocr instead of the root directory.
8
+ ENV HOME=/app
9
+ # Set custom cache directories for PaddleOCR and Transformers (for large model files)
10
+ ENV PADDLEOCR_CACHE_DIR=/app/.cache/paddleocr
11
+ ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
12
+
13
+ # Create /app (ensuring HOME exists), install system dependencies including Git LFS and ccache,
14
+ # then set up Git LFS configuration.
15
+ RUN mkdir -p /app && \
16
+ apt-get update && apt-get install -y --no-install-recommends \
17
+ build-essential \
18
+ libgl1-mesa-glx \
19
+ libglib2.0-0 \
20
+ git-lfs \
21
+ ccache && \
22
+ # Create an empty .gitconfig to satisfy Git LFS requirements.
23
+ touch /app/.gitconfig && \
24
+ git lfs install --force && \
25
+ rm -rf /var/lib/apt/lists/*
26
+
27
+ # Create necessary directories with full permissions so that model files can be downloaded.
28
+ RUN mkdir -p /app/.paddleocr /app/.cache/paddleocr /app/.cache/huggingface && \
29
+ chmod -R 777 /app/.paddleocr /app/.cache/paddleocr /app/.cache/huggingface
30
+
31
+ # Set the working directory for the container.
32
+ WORKDIR /app
33
+
34
+ # Copy the repository contents into the container.
35
+ COPY . /app
36
+
37
+ # Upgrade pip and install Python dependencies.
38
+ # Note: We install PaddlePaddle (CPU-only) using the official find-links URL, then PaddleOCR.
39
+ RUN pip install --upgrade pip setuptools wheel && \
40
+ pip install --no-cache-dir \
41
+ flask \
42
+ opencv-python-headless \
43
+ numpy \
44
+ spacy \
45
+ sentence-transformers \
46
+ transformers \
47
+ requests && \
48
+ pip install --no-cache-dir paddlepaddle==2.5.2 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \
49
+ pip install --no-cache-dir paddleocr
50
+
51
+ # Download spaCy's English model ("en_core_web_md")
52
+ RUN python -m spacy download en_core_web_md
53
+
54
+ # Pre-load heavy models and tokenizers so they're cached inside the image:
55
+ # • SentenceTransformer's "all-mpnet-base-v2"
56
+ # • spaCy's "en_core_web_md"
57
+ # • Transformers model and tokenizer "roberta-large-mnli" used by your entailment classifier
58
+ # • PaddleOCR's models (downloaded into /app/.paddleocr)
59
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" && \
60
+ python -c "import spacy; spacy.load('en_core_web_md')" && \
61
+ python -c "from transformers import AutoModel, AutoTokenizer; AutoModel.from_pretrained('roberta-large-mnli'); AutoTokenizer.from_pretrained('roberta-large-mnli')" && \
62
+ python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en')"
63
+
64
+ # Expose port 7860 for the Flask application.
65
+ EXPOSE 7860
66
+
67
+ # Specify the Flask application entry point.
68
+ ENV FLASK_APP=app.py
69
+
70
+ # Run the Flask application when the container starts.
71
+ CMD ["flask", "run", "--host=0.0.0.0", "--port=7860"]