Spaces:
Running
Running
File size: 1,824 Bytes
3ad6f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#!/usr/bin/env python3
"""
Script to pre-download the sentence-transformers model during Docker build
This helps avoid runtime download issues on Hugging Face Spaces
"""
import os
import sys
from sentence_transformers import SentenceTransformer
def main():
# Set cache directory
cache_dir = "/code/.cache/huggingface"
os.makedirs(cache_dir, exist_ok=True)
# Set environment variables
os.environ["HF_HOME"] = cache_dir
os.environ["TRANSFORMERS_CACHE"] = cache_dir
os.environ["HF_HUB_CACHE"] = cache_dir
print("Downloading sentence-transformers model...")
# List of models to try in order of preference
models_to_try = [
"all-MiniLM-L6-v2", # Default from config
"sentence-transformers/all-MiniLM-L6-v2", # Full path
"sentence-transformers/paraphrase-MiniLM-L6-v2", # Alternative
]
success = False
last_error = None
for model_name in models_to_try:
try:
print(f"Trying model: {model_name}")
model = SentenceTransformer(model_name, cache_folder=cache_dir)
print(f"β
Successfully downloaded {model_name}")
# Test the model
test_text = ["This is a test sentence."]
embeddings = model.encode(test_text)
print(f"β
Model test successful - embedding shape: {embeddings.shape}")
success = True
break
except Exception as e:
print(f"β Failed to download {model_name}: {e}")
last_error = e
continue
if not success:
print(f"β All models failed to download. Last error: {last_error}")
sys.exit(1)
print("β
Model download completed successfully!")
if __name__ == "__main__":
main() |