Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Running

App Files Files Community

Medical-Chatbot / download_model.py

LiamKhoaLe

Lazy load models

4f5341e 9 days ago

raw

history blame contribute delete

2.07 kB

	# download_model.py
	### --- A. transformer and embedder ---
	import os
	import shutil
	from huggingface_hub import snapshot_download

	# Set up paths
	MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
	MODEL_CACHE_DIR = "/app/model_cache"
	HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")

	print("⏳ Downloading the SentenceTransformer model...")
	# Download directly into /app/model_cache to avoid duplicating files from HF cache
	model_path = snapshot_download(
	repo_id=MODEL_REPO,
	cache_dir=HF_CACHE_DIR, # Store HF cache in user cache dir
	local_dir=MODEL_CACHE_DIR, # Place usable model here
	local_dir_use_symlinks=False # Copy files into local_dir (no symlinks)
	)

	print("Model path: ", model_path)
	if not os.path.exists(MODEL_CACHE_DIR):
	os.makedirs(MODEL_CACHE_DIR)

	# Verify structure after moving
	print("\n📂 LLM Model Structure (Build Level):")
	for root, dirs, files in os.walk(MODEL_CACHE_DIR):
	print(f"📁 {root}/")
	for file in files:
	print(f" 📄 {file}")


	### --- B. translation modules ---
	# Optional pre-download of translation models. These can be very large and
	# may exceed build storage limits on constrained environments (e.g., HF Spaces).
	# Control with env var PRELOAD_TRANSLATORS ("1" to enable; default: disabled).
	PRELOAD_TRANSLATORS = os.getenv("PRELOAD_TRANSLATORS", "0")
	if PRELOAD_TRANSLATORS == "1":
	try:
	from transformers import pipeline
	print("⏬ Pre-downloading Vietnamese–English translator...")
	_ = pipeline("translation", model="VietAI/envit5-translation", src_lang="vi", tgt_lang="en", device=-1)
	print("⏬ Pre-downloading Chinese–English translator...")
	_ = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=-1)
	print("✅ Translators preloaded.")
	except Exception as e:
	print(f"⚠️ Skipping translator preload due to error: {e}")
	else:
	print("ℹ️ Skipping translator pre-download (PRELOAD_TRANSLATORS != '1'). They will lazy-load at runtime.")