import argparse import os import yaml from huggingface_hub import hf_hub_download, list_repo_files def load_config(config_path): with open(config_path, 'r') as file: return yaml.safe_load(file) def download_model(model_config): model_id = model_config["model_id"] local_dir = model_config["local_dir"] if local_dir is None: print(f"Skipping download for {model_id}: local_dir is null") return os.makedirs(local_dir, exist_ok=True) allow_patterns = model_config.get("allow", []) deny_patterns = model_config.get("deny", []) if allow_patterns: for file in allow_patterns: hf_hub_download( repo_id=model_id, filename=file, local_dir=local_dir, local_dir_use_symlinks=False ) else: print(f"No allow patterns specified for {model_id}. Attempting to download all files except those in deny list.") repo_files = list_repo_files(repo_id=model_id) for file in repo_files: if not any(deny_pattern in file for deny_pattern in deny_patterns): hf_hub_download( repo_id=model_id, filename=file, local_dir=local_dir, local_dir_use_symlinks=False ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download model checkpoints from Hugging Face Hub") parser.add_argument( "--config_path", type=str, default="configs/model_ckpts.yaml", help="Path to the configuration YAML file" ) args = parser.parse_args() config = load_config(args.config_path) for model_config in config: print(f"Processing {model_config['model_id']} (local_dir: {model_config['local_dir']})") download_model(model_config)