Spaces:
Runtime error
Runtime error
File size: 3,686 Bytes
4aea73a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
model:
names:
- ft_transformer
- fusion_mlp
- hf_text
hf_text:
checkpoint_name: local://hf_text
gradient_checkpointing: false
pooling_mode: cls
data_types:
- text
tokenizer_name: hf_auto
use_fast: true
max_text_len: 512
insert_sep: true
low_cpu_mem_usage: false
text_segment_num: 2
stochastic_chunk: false
text_aug_detect_length: 10
text_trivial_aug_maxscale: 0.1
text_train_augment_types: null
fusion_mlp:
aux_loss_weight: null
adapt_in_features: max
hidden_sizes:
- 128
activation: leaky_relu
dropout: 0.1
normalization: layer_norm
data_types: null
ft_transformer:
data_types:
- numerical
embedding_arch:
- linear
token_dim: 192
hidden_size: 192
num_blocks: 3
attention_num_heads: 8
attention_dropout: 0.2
residual_dropout: 0.0
ffn_dropout: 0.1
ffn_hidden_size: 192
ffn_activation: geglu
head_activation: relu
normalization: layer_norm
merge: concat
requires_all_dtypes: false
additive_attention: false
share_qv_weights: false
pooling_mode: cls
checkpoint_name: null
data:
image:
missing_value_strategy: zero
text:
normalize_text: false
categorical:
minimum_cat_count: 100
maximum_num_cat: 20
convert_to_text: false
convert_to_text_template: latex
numerical:
convert_to_text: false
scaler_with_mean: true
scaler_with_std: true
document:
missing_value_strategy: zero
label:
numerical_preprocessing: standardscaler
pos_label: null
column_features_pooling_mode: concat
mixup:
turn_on: false
mixup_alpha: 0.8
cutmix_alpha: 1.0
cutmix_minmax: null
prob: 1.0
switch_prob: 0.5
mode: batch
turn_off_epoch: 5
label_smoothing: 0.1
modality_dropout: 0
templates:
turn_on: false
num_templates: 30
template_length: 2048
preset_templates:
- super_glue
- rte
custom_templates: null
optim:
optim_type: adamw
lr: 0.0001
weight_decay: 0.001
lr_choice: layerwise_decay
lr_decay: 0.9
lr_schedule: cosine_decay
max_epochs: 20
max_steps: -1
warmup_steps: 0.1
end_lr: 0
lr_mult: 1
patience: 10
val_check_interval: 0.5
check_val_every_n_epoch: 1
skip_final_val: false
gradient_clip_val: 1
gradient_clip_algorithm: norm
track_grad_norm: -1
log_every_n_steps: 10
label_smoothing: 0
top_k: 3
top_k_average_method: greedy_soup
peft: null
lora:
module_filter: null
filter:
- query
- value
- ^q$
- ^v$
- ^k$
- ^o$
r: 8
alpha: 8
conv_lora_expert_num: 8
loss_func: auto
focal_loss:
alpha: null
gamma: 2.0
reduction: mean
mask2former_loss:
loss_cross_entropy_weight: 10.0
loss_mask_weight: 5.0
loss_dice_weight: 5.0
extra_trainable_params: []
cross_modal_align: null
cross_modal_align_weight: 0
automatic_optimization: true
lemda:
turn_on: false
arch_type: mlp_vae
z_dim: 8
num_layers: 6
kld_weight: 0.1
mse_weight: 0.1
adv_weight: 0.0001
consist_weight: 0.01
consist_threshold: 0.5
lr: 0.0001
optim_type: adamw
weight_decay: 1.0e-05
env:
num_gpus: 2
num_nodes: 1
batch_size: 128
per_gpu_batch_size: 8
inference_batch_size_ratio: 4
precision: 16-mixed
num_workers: 2
num_workers_inference: 2
accelerator: auto
fast_dev_run: false
deterministic: false
auto_select_gpus: true
strategy: ddp_fork_find_unused_parameters_true
deepspeed_allgather_size: 1000000000.0
deepspeed_allreduce_size: 1000000000.0
compile:
turn_on: false
mode: default
dynamic: true
backend: inductor
|