File size: 2,076 Bytes
7cb1242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# syntax=docker/dockerfile:1

FROM python:3.10-slim

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    PORT=7860 \
    HF_HOME=/app/.cache/huggingface \
    NLTK_DATA=/app/nltk_data \
    MPLCONFIGDIR=/app/.config/matplotlib

# System deps (build tools and libs for pillow/wordcloud)
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    gcc \
    g++ \
    libjpeg-dev \
    zlib1g-dev \
    libpng-dev \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Install Python deps first (better layer caching)
COPY requirements.txt ./
RUN pip install --upgrade pip && \
    pip install -r requirements.txt

# Prepare writable caches
RUN mkdir -p ${HF_HOME} ${NLTK_DATA} ${MPLCONFIGDIR}

# Copy application code
COPY . .

# Ensure writable permissions for runtime (Spaces/K8s non-root scenarios)
RUN chmod -R 777 /app

# Run postbuild (e.g., install spaCy model) if present
RUN if [ -f postbuild ]; then sh postbuild; else python -m spacy download en_core_web_md; fi

# Pre-download NLTK data to writable dir
RUN python - <<'PY'
import nltk, os
os.makedirs(os.environ.get('NLTK_DATA','/app/nltk_data'), exist_ok=True)
for pkg in ['punkt','punkt_tab','wordnet','averaged_perceptron_tagger']:
    try:
        nltk.download(pkg, download_dir=os.environ['NLTK_DATA'])
    except Exception as e:
        print('NLTK download failed for', pkg, e)
PY

# Preload HF transformer models to writable cache
RUN python - <<'PY'
from transformers import pipeline
# DistilBERT SST-2
pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
# RoBERTa Twitter
pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment')
# Emotion model
pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base')
PY

# Expose default port (can be overridden by $PORT)
EXPOSE 7860

# Start the app using gunicorn (respects $PORT)
CMD ["sh", "-c", "gunicorn -b 0.0.0.0:${PORT:-7860} app:app"]