File size: 3,671 Bytes
b72fefd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python3
"""
Upload trained multi-head SigLIP2 classifier to Hugging Face Hub (private).

This script will create/update the repo `fal/multihead_cls` and push:
- model weights: checkpoints/multi_head_siglip2_classifier.pth
- full training checkpoint: checkpoints/training_checkpoint.pth (optional)
- processor folder: checkpoints/siglip2_processor/
- README.md with usage

Auth: Set HUGGINGFACE_TOKEN environment variable or run `huggingface-cli login`.
"""

import os
import shutil
from pathlib import Path
from typing import Optional

from huggingface_hub import HfApi, HfFolder, create_repo, upload_folder, upload_file


REPO_ID = "fal/multihead_cls"


def ensure_logged_in() -> Optional[str]:
    token = os.getenv("HUGGINGFACE_TOKEN") or HfFolder.get_token()
    if not token:
        raise RuntimeError(
            "No Hugging Face token found. Set HUGGINGFACE_TOKEN or run `huggingface-cli login`."
        )
    return token


def prepare_staging_dir() -> Path:
    root = Path(__file__).parent
    ckpt_dir = root / "checkpoints"
    if not ckpt_dir.exists():
        raise FileNotFoundError("checkpoints/ directory not found. Train the model first.")

    required = [
        ckpt_dir / "multi_head_siglip2_classifier.pth",
        ckpt_dir / "siglip2_processor",
    ]
    for path in required:
        if not path.exists():
            raise FileNotFoundError(f"Missing required artifact: {path}")
    
    # Check for task_config.json in checkpoints or root directory
    task_config_path = ckpt_dir / "task_config.json"
    if not task_config_path.exists():
        task_config_path = root / "task_config.json"
        if not task_config_path.exists():
            raise FileNotFoundError("Missing required artifact: task_config.json (checked both checkpoints/ and root directory)")

    staging = root / "hf_export"
    if staging.exists():
        shutil.rmtree(staging)
    staging.mkdir(parents=True)

    # Copy artifacts
    shutil.copy2(ckpt_dir / "multi_head_siglip2_classifier.pth", staging / "model.pth")
    shutil.copy2(task_config_path, staging / "task_config.json")

    # Optional: training checkpoint and other metadata
    train_ckpt = ckpt_dir / "training_checkpoint.pth"
    if train_ckpt.exists():
        shutil.copy2(train_ckpt, staging / "training_checkpoint.pth")
    
    # Optional: training history and validation summary
    for optional_file in ["training_history.json", "validation_summary.json", "stage4_summary.json"]:
        optional_path = ckpt_dir / optional_file
        if optional_path.exists():
            shutil.copy2(optional_path, staging / optional_file)

    # Processor
    shutil.copytree(ckpt_dir / "siglip2_processor", staging / "processor")

    # Add example and README if present
    readme_src = root / "README.md"
    if readme_src.exists():
        shutil.copy2(readme_src, staging / "README.md")
    example_src = root / "example.py"
    if example_src.exists():
        shutil.copy2(example_src, staging / "example.py")

    return staging


def upload_to_hub(private: bool = True) -> None:
    token = ensure_logged_in()
    api = HfApi(token=token)

    create_repo(REPO_ID, private=private, repo_type="model", exist_ok=True, token=token)

    staging = prepare_staging_dir()

    # Upload all files in staging
    upload_folder(
        folder_path=str(staging),
        repo_id=REPO_ID,
        repo_type="model",
        commit_message="Upload multi-head SigLIP2 classifier with dynamic task configuration",
        token=token,
    )

    print(f"Uploaded to https://huggingface.co/{REPO_ID} (private={private})")


if __name__ == "__main__":
    upload_to_hub(private=True)