#!/usr/bin/env python3
"""
Script to pre-download the sentence-transformers model during Docker build
This helps avoid runtime download issues on Hugging Face Spaces
"""

import os
import sys
from sentence_transformers import SentenceTransformer

def main():
    # Set cache directory
    cache_dir = "/code/.cache/huggingface"
    os.makedirs(cache_dir, exist_ok=True)
    
    # Set environment variables
    os.environ["HF_HOME"] = cache_dir
    os.environ["TRANSFORMERS_CACHE"] = cache_dir
    os.environ["HF_HUB_CACHE"] = cache_dir
    
    print("Downloading sentence-transformers model...")
    
    # List of models to try in order of preference
    models_to_try = [
        "all-MiniLM-L6-v2",  # Default from config
        "sentence-transformers/all-MiniLM-L6-v2",  # Full path
        "sentence-transformers/paraphrase-MiniLM-L6-v2",  # Alternative
    ]
    
    success = False
    last_error = None
    
    for model_name in models_to_try:
        try:
            print(f"Trying model: {model_name}")
            model = SentenceTransformer(model_name, cache_folder=cache_dir)
            print(f"✅ Successfully downloaded {model_name}")
            
            # Test the model
            test_text = ["This is a test sentence."]
            embeddings = model.encode(test_text)
            print(f"✅ Model test successful - embedding shape: {embeddings.shape}")
            
            success = True
            break
            
        except Exception as e:
            print(f"❌ Failed to download {model_name}: {e}")
            last_error = e
            continue
    
    if not success:
        print(f"❌ All models failed to download. Last error: {last_error}")
        sys.exit(1)
    
    print("✅ Model download completed successfully!")

if __name__ == "__main__":
    main()