| import os |
| import argparse |
| import subprocess |
| from typing import Literal, Optional |
|
|
| def submit_hf_job( |
| image: str = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel", |
| flavor: str = "a10g-small", |
| command: str = "python3 train.py", |
| timeout: str = "2h", |
| secrets: Optional[list] = None |
| ): |
| """Submits a job to Hugging Face Jobs using the hf-cli.""" |
| print(f"Submitting job to Hugging Face (Flavor: {flavor})") |
| |
| cmd = [ |
| "hf", "jobs", "run", |
| "--flavor", flavor, |
| "--timeout", timeout, |
| "--secrets", "HF_TOKEN" |
| ] |
| |
| if secrets: |
| for s in secrets: |
| cmd.extend(["--secrets", s]) |
| |
| cmd.extend([image] + command.split()) |
| |
| print(f"Executing: {' '.join(cmd)}") |
| subprocess.run(cmd, check=True) |
|
|
| def submit_kaggle_job( |
| script_path: str, |
| competition: Optional[str] = None, |
| dataset_path: Optional[str] = None |
| ): |
| """Submits a job to Kaggle using the Kaggle CLI.""" |
| |
| |
| print(f"Submitting script {script_path} to Kaggle...") |
| |
| |
| |
| |
| print("Step 1: Generate kernel-metadata.json") |
| print("Step 2: kaggle kernels push -p .") |
| |
| |
| |
| pass |
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser(description="Unified Job Submission for Qwen Trainer") |
| parser.add_argument("--platform", choices=["hf", "kaggle"], required=True) |
| parser.add_argument("--flavor", type=str, default="a10g-small", help="HF Job flavor") |
| parser.add_argument("--image", type=str, default="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel") |
| parser.add_argument("--command", type=str, default="python3 skills/qwen-trainer/scripts/train.py --model Qwen/Qwen3.5-7B --method grpo --use_agentic --task 'Complex Reasoning' --num_synthetic 100") |
| parser.add_argument("--timeout", type=str, default="2h") |
| |
| args = parser.parse_args() |
| |
| if args.platform == "hf": |
| submit_hf_job( |
| image=args.image, |
| flavor=args.flavor, |
| command=args.command, |
| timeout=args.timeout |
| ) |
| elif args.platform == "kaggle": |
| |
| submit_kaggle_job("skills/qwen-trainer/scripts/train.py") |
|
|