mindchain's picture
Upload folder using huggingface_hub
78a0ca9 verified
import os
import argparse
import subprocess
from typing import Literal, Optional
def submit_hf_job(
image: str = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel",
flavor: str = "a10g-small",
command: str = "python3 train.py",
timeout: str = "2h",
secrets: Optional[list] = None
):
"""Submits a job to Hugging Face Jobs using the hf-cli."""
print(f"Submitting job to Hugging Face (Flavor: {flavor})")
cmd = [
"hf", "jobs", "run",
"--flavor", flavor,
"--timeout", timeout,
"--secrets", "HF_TOKEN"
]
if secrets:
for s in secrets:
cmd.extend(["--secrets", s])
cmd.extend([image] + command.split())
print(f"Executing: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
def submit_kaggle_job(
script_path: str,
competition: Optional[str] = None,
dataset_path: Optional[str] = None
):
"""Submits a job to Kaggle using the Kaggle CLI."""
# Kaggle submission is often for competitions, but for general training
# it usually involves pushing a kernel/notebook.
print(f"Submitting script {script_path} to Kaggle...")
# Placeholder: In a real scenario, we'd generate a kernel-metadata.json
# and use 'kaggle kernels push -p /path/to/kernel'
# For now, we'll just show intent.
print("Step 1: Generate kernel-metadata.json")
print("Step 2: kaggle kernels push -p .")
# Example command (commented out as it needs a full dir with metadata)
# subprocess.run(["kaggle", "kernels", "push", "-p", "."], check=True)
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Unified Job Submission for Qwen Trainer")
parser.add_argument("--platform", choices=["hf", "kaggle"], required=True)
parser.add_argument("--flavor", type=str, default="a10g-small", help="HF Job flavor")
parser.add_argument("--image", type=str, default="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel")
parser.add_argument("--command", type=str, default="python3 skills/qwen-trainer/scripts/train.py --model Qwen/Qwen3.5-7B --method grpo --use_agentic --task 'Complex Reasoning' --num_synthetic 100")
parser.add_argument("--timeout", type=str, default="2h")
args = parser.parse_args()
if args.platform == "hf":
submit_hf_job(
image=args.image,
flavor=args.flavor,
command=args.command,
timeout=args.timeout
)
elif args.platform == "kaggle":
# For Kaggle we'd typically need the full script plus deps
submit_kaggle_job("skills/qwen-trainer/scripts/train.py")