import os import argparse import subprocess from typing import Literal, Optional def submit_hf_job( image: str = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel", flavor: str = "a10g-small", command: str = "python3 train.py", timeout: str = "2h", secrets: Optional[list] = None ): """Submits a job to Hugging Face Jobs using the hf-cli.""" print(f"Submitting job to Hugging Face (Flavor: {flavor})") cmd = [ "hf", "jobs", "run", "--flavor", flavor, "--timeout", timeout, "--secrets", "HF_TOKEN" ] if secrets: for s in secrets: cmd.extend(["--secrets", s]) cmd.extend([image] + command.split()) print(f"Executing: {' '.join(cmd)}") subprocess.run(cmd, check=True) def submit_kaggle_job( script_path: str, competition: Optional[str] = None, dataset_path: Optional[str] = None ): """Submits a job to Kaggle using the Kaggle CLI.""" # Kaggle submission is often for competitions, but for general training # it usually involves pushing a kernel/notebook. print(f"Submitting script {script_path} to Kaggle...") # Placeholder: In a real scenario, we'd generate a kernel-metadata.json # and use 'kaggle kernels push -p /path/to/kernel' # For now, we'll just show intent. print("Step 1: Generate kernel-metadata.json") print("Step 2: kaggle kernels push -p .") # Example command (commented out as it needs a full dir with metadata) # subprocess.run(["kaggle", "kernels", "push", "-p", "."], check=True) pass if __name__ == "__main__": parser = argparse.ArgumentParser(description="Unified Job Submission for Qwen Trainer") parser.add_argument("--platform", choices=["hf", "kaggle"], required=True) parser.add_argument("--flavor", type=str, default="a10g-small", help="HF Job flavor") parser.add_argument("--image", type=str, default="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel") parser.add_argument("--command", type=str, default="python3 skills/qwen-trainer/scripts/train.py --model Qwen/Qwen3.5-7B --method grpo --use_agentic --task 'Complex Reasoning' --num_synthetic 100") parser.add_argument("--timeout", type=str, default="2h") args = parser.parse_args() if args.platform == "hf": submit_hf_job( image=args.image, flavor=args.flavor, command=args.command, timeout=args.timeout ) elif args.platform == "kaggle": # For Kaggle we'd typically need the full script plus deps submit_kaggle_job("skills/qwen-trainer/scripts/train.py")