File size: 2,069 Bytes
2415f43
 
67b29cd
 
 
 
a33bbc9
375dbf3
67b29cd
4f5341e
67b29cd
 
4f5341e
 
 
 
 
 
 
67b29cd
ad9ea09
3227486
 
ad9ea09
3227486
e22b331
a33bbc9
 
 
 
2415f43
 
 
4f5341e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# download_model.py
### --- A. transformer and embedder ---
import os
import shutil
from huggingface_hub import snapshot_download

# Set up paths
MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_CACHE_DIR = "/app/model_cache"
HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")

print("⏳ Downloading the SentenceTransformer model...")
# Download directly into /app/model_cache to avoid duplicating files from HF cache
model_path = snapshot_download(
    repo_id=MODEL_REPO,
    cache_dir=HF_CACHE_DIR,              # Store HF cache in user cache dir
    local_dir=MODEL_CACHE_DIR,           # Place usable model here
    local_dir_use_symlinks=False         # Copy files into local_dir (no symlinks)
)

print("Model path: ", model_path)
if not os.path.exists(MODEL_CACHE_DIR):
    os.makedirs(MODEL_CACHE_DIR)

# Verify structure after moving
print("\n📂 LLM Model Structure (Build Level):")
for root, dirs, files in os.walk(MODEL_CACHE_DIR):
    print(f"📁 {root}/")
    for file in files:
        print(f"  📄 {file}")


### --- B. translation modules ---
# Optional pre-download of translation models. These can be very large and
# may exceed build storage limits on constrained environments (e.g., HF Spaces).
# Control with env var PRELOAD_TRANSLATORS ("1" to enable; default: disabled).
PRELOAD_TRANSLATORS = os.getenv("PRELOAD_TRANSLATORS", "0")
if PRELOAD_TRANSLATORS == "1":
    try:
        from transformers import pipeline
        print("⏬ Pre-downloading Vietnamese–English translator...")
        _ = pipeline("translation", model="VietAI/envit5-translation", src_lang="vi", tgt_lang="en", device=-1)
        print("⏬ Pre-downloading Chinese–English translator...")
        _ = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=-1)
        print("✅ Translators preloaded.")
    except Exception as e:
        print(f"⚠️ Skipping translator preload due to error: {e}")
else:
    print("ℹ️ Skipping translator pre-download (PRELOAD_TRANSLATORS != '1'). They will lazy-load at runtime.")