Spaces:

kerols77
/

grade

Running

App Files Files Community

grade / Dockerfile

kerols77

Update Dockerfile

d6849d3 verified about 2 months ago

raw

history blame contribute delete

3.05 kB

	# Use an official Python runtime as the base image
	FROM python:3.9

	# Ensure Python output is unbuffered for real-time logging
	ENV PYTHONUNBUFFERED=1

	# Set HOME so that PaddleOCR uses /app/.paddleocr instead of the root directory.
	ENV HOME=/app
	# Set custom cache directories for PaddleOCR and Transformers (for large model files)
	ENV PADDLEOCR_CACHE_DIR=/app/.cache/paddleocr
	ENV TRANSFORMERS_CACHE=/app/.cache/huggingface

	# Create /app (ensuring HOME exists), install system dependencies including Git LFS and ccache,
	# then set up Git LFS configuration.
	RUN mkdir -p /app && \
	apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	libgl1-mesa-glx \
	libglib2.0-0 \
	git-lfs \
	ccache && \
	# Create an empty .gitconfig to satisfy Git LFS requirements.
	touch /app/.gitconfig && \
	git lfs install --force && \
	rm -rf /var/lib/apt/lists/*

	# Create necessary directories with full permissions so that model files can be downloaded.
	RUN mkdir -p /app/.paddleocr /app/.cache/paddleocr /app/.cache/huggingface && \
	chmod -R 777 /app/.paddleocr /app/.cache/paddleocr /app/.cache/huggingface

	# Set the working directory for the container.
	WORKDIR /app

	# Copy the repository contents into the container.
	COPY . /app

	# Upgrade pip and install Python dependencies.
	# Note: We install PaddlePaddle (CPU-only) using the official find-links URL, then PaddleOCR.
	RUN pip install --upgrade pip setuptools wheel && \
	pip install --no-cache-dir \
	flask==3.1.0 \
	opencv-python-headless==4.11.0.86 \
	numpy==1.23.5 \
	spacy==3.8.3 \
	spacy-legacy==3.0.12 \
	spacy-loggers==1.0.5 \
	sentence-transformers==3.4.1 \
	transformers==4.49.0 \
	torch==2.6.0 \
	requests==2.32.3 && \
	pip install --no-cache-dir paddlepaddle==2.6.2 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \
	pip install --no-cache-dir paddleocr==2.9.1

	# Download spaCy's English model ("en_core_web_md")
	RUN python -m spacy download en_core_web_md

	# Pre-load heavy models and tokenizers so they're cached inside the image:
	# • SentenceTransformer's "all-mpnet-base-v2"
	# • spaCy's "en_core_web_md"
	# • Transformers model and tokenizer "roberta-large-mnli" used by your entailment classifier
	# • PaddleOCR's models (downloaded into /app/.paddleocr)
	#RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" && \
	# python -c "import spacy; spacy.load('en_core_web_md')" && \
	# python -c "from transformers import AutoModel, AutoTokenizer; AutoModel.from_pretrained('roberta-large-mnli'); AutoTokenizer.from_pretrained('roberta-large-mnli')" && \
	# python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en')"

	# Expose port 7860 for the Flask application.
	EXPOSE 7860

	# Specify the Flask application entry point.
	ENV FLASK_APP=app.py

	# Run the Flask application when the container starts.
	CMD ["flask", "run", "--host=0.0.0.0", "--port=7860"]