#!/usr/bin/env python3 """ Script to pre-download the sentence-transformers model during Docker build This helps avoid runtime download issues on Hugging Face Spaces """ import os import sys from sentence_transformers import SentenceTransformer def main(): # Set cache directory cache_dir = "/code/.cache/huggingface" os.makedirs(cache_dir, exist_ok=True) # Set environment variables os.environ["HF_HOME"] = cache_dir os.environ["TRANSFORMERS_CACHE"] = cache_dir os.environ["HF_HUB_CACHE"] = cache_dir print("Downloading sentence-transformers model...") # List of models to try in order of preference models_to_try = [ "all-MiniLM-L6-v2", # Default from config "sentence-transformers/all-MiniLM-L6-v2", # Full path "sentence-transformers/paraphrase-MiniLM-L6-v2", # Alternative ] success = False last_error = None for model_name in models_to_try: try: print(f"Trying model: {model_name}") model = SentenceTransformer(model_name, cache_folder=cache_dir) print(f"✅ Successfully downloaded {model_name}") # Test the model test_text = ["This is a test sentence."] embeddings = model.encode(test_text) print(f"✅ Model test successful - embedding shape: {embeddings.shape}") success = True break except Exception as e: print(f"❌ Failed to download {model_name}: {e}") last_error = e continue if not success: print(f"❌ All models failed to download. Last error: {last_error}") sys.exit(1) print("✅ Model download completed successfully!") if __name__ == "__main__": main()