#!/usr/bin/env python3 """ Download script for DeepCoder model Downloads and caches the model for faster container startup """ import os import logging from transformers import AutoTokenizer, AutoModelForCausalLM from huggingface_hub import snapshot_download logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview") CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache") def download_model(): """Download the model and tokenizer""" try: logger.info(f"Downloading model: {MODEL_NAME}") # Download model files snapshot_download( repo_id=MODEL_NAME, cache_dir=CACHE_DIR, resume_download=True ) # Verify by loading tokenizer tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, cache_dir=CACHE_DIR, trust_remote_code=True ) logger.info("Model downloaded successfully") logger.info(f"Vocab size: {tokenizer.vocab_size}") logger.info(f"Cache directory: {CACHE_DIR}") return True except Exception as e: logger.error(f"Error downloading model: {str(e)}") return False if __name__ == "__main__": success = download_model() if not success: exit(1) logger.info("Download complete!")