File size: 2,915 Bytes
2812a91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
model:
names:
- timm_image
timm_image:
checkpoint_name: efficientnet_b0
mix_choice: all_logits
data_types:
- image
train_transforms:
- resize_shorter_side
- center_crop
- trivial_augment
val_transforms:
- resize_shorter_side
- center_crop
image_norm: imagenet
image_size: null
image_chan_num: 3
use_learnable_image: false
max_image_num_per_column: 1
data:
image:
missing_value_strategy: zero
text:
normalize_text: false
categorical:
minimum_cat_count: 100
maximum_num_cat: 20
convert_to_text: false
convert_to_text_template: latex
numerical:
convert_to_text: false
scaler_with_mean: true
scaler_with_std: true
document:
missing_value_strategy: zero
label:
numerical_preprocessing: standardscaler
pos_label: null
column_features_pooling_mode: concat
mixup:
turn_on: false
mixup_alpha: 0.8
cutmix_alpha: 1.0
cutmix_minmax: null
prob: 1.0
switch_prob: 0.5
mode: batch
turn_off_epoch: 5
label_smoothing: 0.1
modality_dropout: 0
templates:
turn_on: false
num_templates: 30
template_length: 2048
preset_templates:
- super_glue
- rte
custom_templates: null
optim:
optim_type: adamw
lr: 0.0001
weight_decay: 0.001
lr_choice: layerwise_decay
lr_decay: 0.9
lr_schedule: cosine_decay
max_epochs: 20
max_steps: -1
warmup_steps: 0.1
end_lr: 0
lr_mult: 1
patience: 10
val_check_interval: 0.5
check_val_every_n_epoch: 1
skip_final_val: false
gradient_clip_val: 1
gradient_clip_algorithm: norm
track_grad_norm: -1
log_every_n_steps: 10
label_smoothing: 0
top_k: 3
top_k_average_method: greedy_soup
peft: null
lora:
module_filter: null
filter:
- query
- value
- ^q$
- ^v$
- ^k$
- ^o$
r: 8
alpha: 8
conv_lora_expert_num: 8
loss_func: auto
focal_loss:
alpha: null
gamma: 2.0
reduction: mean
mask2former_loss:
loss_cross_entropy_weight: 10.0
loss_mask_weight: 5.0
loss_dice_weight: 5.0
extra_trainable_params: []
cross_modal_align: null
cross_modal_align_weight: 0
automatic_optimization: true
lemda:
turn_on: false
arch_type: mlp_vae
z_dim: 8
num_layers: 6
kld_weight: 0.1
mse_weight: 0.1
adv_weight: 0.0001
consist_weight: 0.01
consist_threshold: 0.5
lr: 0.0001
optim_type: adamw
weight_decay: 1.0e-05
env:
num_gpus: 1
num_nodes: 1
batch_size: 128
per_gpu_batch_size: 8
inference_batch_size_ratio: 4
precision: 16-mixed
num_workers: 2
num_workers_inference: 2
accelerator: auto
fast_dev_run: false
deterministic: false
auto_select_gpus: true
strategy: auto
deepspeed_allgather_size: 1000000000.0
deepspeed_allreduce_size: 1000000000.0
compile:
turn_on: false
mode: default
dynamic: true
backend: inductor
|