optim_type: adamw lr_choice: layerwise_decay lr_schedule: cosine_decay lr: 0.0001 lr_decay: 0.9 end_lr: 0 lr_mult: 1 weight_decay: 0.001 warmup_steps: 0.1 validation_metric_name: accuracy peft: null mixup_off_epoch: 5 skip_final_val: false track_grad_norm: -1 cross_modal_align: null cross_modal_align_weight: 0 automatic_optimization: true accumulate_grad_batches: 8 gradient_clip_val: 1 gradient_clip_algorithm: norm use_aug_optim: false aug_lr: 0.0001 aug_weight_decay: 1.0e-05 aug_optim_type: adamw