Spaces:
Runtime error
Runtime error
model: | |
names: | |
- ft_transformer | |
- fusion_mlp | |
- hf_text | |
hf_text: | |
checkpoint_name: local://hf_text | |
gradient_checkpointing: false | |
pooling_mode: cls | |
data_types: | |
- text | |
tokenizer_name: hf_auto | |
use_fast: true | |
max_text_len: 512 | |
insert_sep: true | |
low_cpu_mem_usage: false | |
text_segment_num: 2 | |
stochastic_chunk: false | |
text_aug_detect_length: 10 | |
text_trivial_aug_maxscale: 0.1 | |
text_train_augment_types: null | |
fusion_mlp: | |
aux_loss_weight: null | |
adapt_in_features: max | |
hidden_sizes: | |
- 128 | |
activation: leaky_relu | |
dropout: 0.1 | |
normalization: layer_norm | |
data_types: null | |
ft_transformer: | |
data_types: | |
- numerical | |
embedding_arch: | |
- linear | |
token_dim: 192 | |
hidden_size: 192 | |
num_blocks: 3 | |
attention_num_heads: 8 | |
attention_dropout: 0.2 | |
residual_dropout: 0.0 | |
ffn_dropout: 0.1 | |
ffn_hidden_size: 192 | |
ffn_activation: geglu | |
head_activation: relu | |
normalization: layer_norm | |
merge: concat | |
requires_all_dtypes: false | |
additive_attention: false | |
share_qv_weights: false | |
pooling_mode: cls | |
checkpoint_name: null | |
data: | |
image: | |
missing_value_strategy: zero | |
text: | |
normalize_text: false | |
categorical: | |
minimum_cat_count: 100 | |
maximum_num_cat: 20 | |
convert_to_text: false | |
convert_to_text_template: latex | |
numerical: | |
convert_to_text: false | |
scaler_with_mean: true | |
scaler_with_std: true | |
document: | |
missing_value_strategy: zero | |
label: | |
numerical_preprocessing: standardscaler | |
pos_label: null | |
column_features_pooling_mode: concat | |
mixup: | |
turn_on: false | |
mixup_alpha: 0.8 | |
cutmix_alpha: 1.0 | |
cutmix_minmax: null | |
prob: 1.0 | |
switch_prob: 0.5 | |
mode: batch | |
turn_off_epoch: 5 | |
label_smoothing: 0.1 | |
modality_dropout: 0 | |
templates: | |
turn_on: false | |
num_templates: 30 | |
template_length: 2048 | |
preset_templates: | |
- super_glue | |
- rte | |
custom_templates: null | |
optim: | |
optim_type: adamw | |
lr: 0.0001 | |
weight_decay: 0.001 | |
lr_choice: layerwise_decay | |
lr_decay: 0.9 | |
lr_schedule: cosine_decay | |
max_epochs: 20 | |
max_steps: -1 | |
warmup_steps: 0.1 | |
end_lr: 0 | |
lr_mult: 1 | |
patience: 10 | |
val_check_interval: 0.5 | |
check_val_every_n_epoch: 1 | |
skip_final_val: false | |
gradient_clip_val: 1 | |
gradient_clip_algorithm: norm | |
track_grad_norm: -1 | |
log_every_n_steps: 10 | |
label_smoothing: 0 | |
top_k: 3 | |
top_k_average_method: greedy_soup | |
peft: null | |
lora: | |
module_filter: null | |
filter: | |
- query | |
- value | |
- ^q$ | |
- ^v$ | |
- ^k$ | |
- ^o$ | |
r: 8 | |
alpha: 8 | |
conv_lora_expert_num: 8 | |
loss_func: auto | |
focal_loss: | |
alpha: null | |
gamma: 2.0 | |
reduction: mean | |
mask2former_loss: | |
loss_cross_entropy_weight: 10.0 | |
loss_mask_weight: 5.0 | |
loss_dice_weight: 5.0 | |
extra_trainable_params: [] | |
cross_modal_align: null | |
cross_modal_align_weight: 0 | |
automatic_optimization: true | |
lemda: | |
turn_on: false | |
arch_type: mlp_vae | |
z_dim: 8 | |
num_layers: 6 | |
kld_weight: 0.1 | |
mse_weight: 0.1 | |
adv_weight: 0.0001 | |
consist_weight: 0.01 | |
consist_threshold: 0.5 | |
lr: 0.0001 | |
optim_type: adamw | |
weight_decay: 1.0e-05 | |
env: | |
num_gpus: 2 | |
num_nodes: 1 | |
batch_size: 128 | |
per_gpu_batch_size: 8 | |
inference_batch_size_ratio: 4 | |
precision: 16-mixed | |
num_workers: 2 | |
num_workers_inference: 2 | |
accelerator: auto | |
fast_dev_run: false | |
deterministic: false | |
auto_select_gpus: true | |
strategy: ddp_fork_find_unused_parameters_true | |
deepspeed_allgather_size: 1000000000.0 | |
deepspeed_allreduce_size: 1000000000.0 | |
compile: | |
turn_on: false | |
mode: default | |
dynamic: true | |
backend: inductor | |