multihead_cls / upload_to_hf.py
cloneofsimo's picture
Upload folder using huggingface_hub
b72fefd verified
#!/usr/bin/env python3
"""
Upload trained multi-head SigLIP2 classifier to Hugging Face Hub (private).
This script will create/update the repo `fal/multihead_cls` and push:
- model weights: checkpoints/multi_head_siglip2_classifier.pth
- full training checkpoint: checkpoints/training_checkpoint.pth (optional)
- processor folder: checkpoints/siglip2_processor/
- README.md with usage
Auth: Set HUGGINGFACE_TOKEN environment variable or run `huggingface-cli login`.
"""
import os
import shutil
from pathlib import Path
from typing import Optional
from huggingface_hub import HfApi, HfFolder, create_repo, upload_folder, upload_file
REPO_ID = "fal/multihead_cls"
def ensure_logged_in() -> Optional[str]:
token = os.getenv("HUGGINGFACE_TOKEN") or HfFolder.get_token()
if not token:
raise RuntimeError(
"No Hugging Face token found. Set HUGGINGFACE_TOKEN or run `huggingface-cli login`."
)
return token
def prepare_staging_dir() -> Path:
root = Path(__file__).parent
ckpt_dir = root / "checkpoints"
if not ckpt_dir.exists():
raise FileNotFoundError("checkpoints/ directory not found. Train the model first.")
required = [
ckpt_dir / "multi_head_siglip2_classifier.pth",
ckpt_dir / "siglip2_processor",
]
for path in required:
if not path.exists():
raise FileNotFoundError(f"Missing required artifact: {path}")
# Check for task_config.json in checkpoints or root directory
task_config_path = ckpt_dir / "task_config.json"
if not task_config_path.exists():
task_config_path = root / "task_config.json"
if not task_config_path.exists():
raise FileNotFoundError("Missing required artifact: task_config.json (checked both checkpoints/ and root directory)")
staging = root / "hf_export"
if staging.exists():
shutil.rmtree(staging)
staging.mkdir(parents=True)
# Copy artifacts
shutil.copy2(ckpt_dir / "multi_head_siglip2_classifier.pth", staging / "model.pth")
shutil.copy2(task_config_path, staging / "task_config.json")
# Optional: training checkpoint and other metadata
train_ckpt = ckpt_dir / "training_checkpoint.pth"
if train_ckpt.exists():
shutil.copy2(train_ckpt, staging / "training_checkpoint.pth")
# Optional: training history and validation summary
for optional_file in ["training_history.json", "validation_summary.json", "stage4_summary.json"]:
optional_path = ckpt_dir / optional_file
if optional_path.exists():
shutil.copy2(optional_path, staging / optional_file)
# Processor
shutil.copytree(ckpt_dir / "siglip2_processor", staging / "processor")
# Add example and README if present
readme_src = root / "README.md"
if readme_src.exists():
shutil.copy2(readme_src, staging / "README.md")
example_src = root / "example.py"
if example_src.exists():
shutil.copy2(example_src, staging / "example.py")
return staging
def upload_to_hub(private: bool = True) -> None:
token = ensure_logged_in()
api = HfApi(token=token)
create_repo(REPO_ID, private=private, repo_type="model", exist_ok=True, token=token)
staging = prepare_staging_dir()
# Upload all files in staging
upload_folder(
folder_path=str(staging),
repo_id=REPO_ID,
repo_type="model",
commit_message="Upload multi-head SigLIP2 classifier with dynamic task configuration",
token=token,
)
print(f"Uploaded to https://huggingface.co/{REPO_ID} (private={private})")
if __name__ == "__main__":
upload_to_hub(private=True)