rope_vit_reg4_b14_capi / training_args.json
hassonofer's picture
Upload 2 files
dd3a1c8 verified
{
"cmdline": "birder/.venv/lib/python3.10/site-packages/birder/scripts/train_capi.py --network rope_vit_reg4_b14 --decoder-layers 8 --decoder-dim 768 --opt adamw --lr 0.001 --opt-betas 0.9 0.95 --lr-scheduler-update iter --lr-scheduler cosine --lr-cosine-min 1e-7 --warmup-epochs 20 --batch-size 1024 --epochs 200 --wd 0.1 --norm-wd 0.01 --amp --amp-dtype bfloat16 --compile --find-unused-parameters --keep-last 4 --wds --wds-info data/ssl_packed/_info.json",
"network": "rope_vit_reg4_b14",
"net_param": null,
"model_config": null,
"decoder_layers": 8,
"decoder_dim": 768,
"num_clusters": 16384,
"mask_ratio": 0.65,
"kept_mask_ratio": 0.05,
"momentum_teacher": 0.999,
"compile": true,
"compile_opt": false,
"opt": "adamw",
"momentum": 0.9,
"nesterov": false,
"opt_eps": null,
"opt_betas": [
0.9,
0.95
],
"opt_alpha": null,
"lr": 0.001,
"lr_scale": null,
"lr_scale_type": "linear",
"wd": 0.1,
"norm_wd": 0.01,
"bias_weight_decay": null,
"transformer_embedding_decay": null,
"layer_decay": null,
"lr_scheduler_update": "iter",
"lr_scheduler": "cosine",
"lr_step_size": 40,
"lr_steps": null,
"lr_step_gamma": 0.75,
"lr_cosine_min": 1e-07,
"lr_power": 1.0,
"grad_accum_steps": 1,
"channels": 3,
"size": [
224,
224
],
"batch_size": 1024,
"warmup_epochs": 20,
"aug_level": 1,
"rgb_mode": "birder",
"resize_min_scale": 0.6,
"epochs": 200,
"stop_epoch": 201,
"save_frequency": 1,
"keep_last": 4,
"resume_epoch": null,
"load_states": false,
"tag": null,
"log_interval": 50,
"num_workers": 16,
"img_loader": "tv",
"prefetch_factor": null,
"model_dtype": "float32",
"amp": true,
"amp_dtype": "bfloat16",
"fast_matmul": false,
"grad_anomaly_detection": false,
"world_size": 8,
"dist_url": "env://",
"find_unused_parameters": true,
"clip_grad_norm": null,
"gpu": 0,
"cpu": false,
"use_deterministic_algorithms": false,
"plot_lr": false,
"no_summary": false,
"data_path": [],
"wds": true,
"wds_info": "data/ssl_packed/_info.json",
"wds_cache_dir": null,
"wds_train_size": null,
"wds_split": "training",
"rank": 0,
"distributed": true,
"dist_backend": "nccl"
}