Spaces:

joeyaintjoking
/

ml_dart_model

Runtime error

App Files Files Community

ml_dart_model / ml_model /config.yaml

joeyaintjoking

initial commit

4aea73a about 1 month ago

raw

history blame contribute delete

3.69 kB

	model:
	names:
	- ft_transformer
	- fusion_mlp
	- hf_text
	hf_text:
	checkpoint_name: local://hf_text
	gradient_checkpointing: false
	pooling_mode: cls
	data_types:
	- text
	tokenizer_name: hf_auto
	use_fast: true
	max_text_len: 512
	insert_sep: true
	low_cpu_mem_usage: false
	text_segment_num: 2
	stochastic_chunk: false
	text_aug_detect_length: 10
	text_trivial_aug_maxscale: 0.1
	text_train_augment_types: null
	fusion_mlp:
	aux_loss_weight: null
	adapt_in_features: max
	hidden_sizes:
	- 128
	activation: leaky_relu
	dropout: 0.1
	normalization: layer_norm
	data_types: null
	ft_transformer:
	data_types:
	- numerical
	embedding_arch:
	- linear
	token_dim: 192
	hidden_size: 192
	num_blocks: 3
	attention_num_heads: 8
	attention_dropout: 0.2
	residual_dropout: 0.0
	ffn_dropout: 0.1
	ffn_hidden_size: 192
	ffn_activation: geglu
	head_activation: relu
	normalization: layer_norm
	merge: concat
	requires_all_dtypes: false
	additive_attention: false
	share_qv_weights: false
	pooling_mode: cls
	checkpoint_name: null
	data:
	image:
	missing_value_strategy: zero
	text:
	normalize_text: false
	categorical:
	minimum_cat_count: 100
	maximum_num_cat: 20
	convert_to_text: false
	convert_to_text_template: latex
	numerical:
	convert_to_text: false
	scaler_with_mean: true
	scaler_with_std: true
	document:
	missing_value_strategy: zero
	label:
	numerical_preprocessing: standardscaler
	pos_label: null
	column_features_pooling_mode: concat
	mixup:
	turn_on: false
	mixup_alpha: 0.8
	cutmix_alpha: 1.0
	cutmix_minmax: null
	prob: 1.0
	switch_prob: 0.5
	mode: batch
	turn_off_epoch: 5
	label_smoothing: 0.1
	modality_dropout: 0
	templates:
	turn_on: false
	num_templates: 30
	template_length: 2048
	preset_templates:
	- super_glue
	- rte
	custom_templates: null
	optim:
	optim_type: adamw
	lr: 0.0001
	weight_decay: 0.001
	lr_choice: layerwise_decay
	lr_decay: 0.9
	lr_schedule: cosine_decay
	max_epochs: 20
	max_steps: -1
	warmup_steps: 0.1
	end_lr: 0
	lr_mult: 1
	patience: 10
	val_check_interval: 0.5
	check_val_every_n_epoch: 1
	skip_final_val: false
	gradient_clip_val: 1
	gradient_clip_algorithm: norm
	track_grad_norm: -1
	log_every_n_steps: 10
	label_smoothing: 0
	top_k: 3
	top_k_average_method: greedy_soup
	peft: null
	lora:
	module_filter: null
	filter:
	- query
	- value
	- ^q$
	- ^v$
	- ^k$
	- ^o$
	r: 8
	alpha: 8
	conv_lora_expert_num: 8
	loss_func: auto
	focal_loss:
	alpha: null
	gamma: 2.0
	reduction: mean
	mask2former_loss:
	loss_cross_entropy_weight: 10.0
	loss_mask_weight: 5.0
	loss_dice_weight: 5.0
	extra_trainable_params: []
	cross_modal_align: null
	cross_modal_align_weight: 0
	automatic_optimization: true
	lemda:
	turn_on: false
	arch_type: mlp_vae
	z_dim: 8
	num_layers: 6
	kld_weight: 0.1
	mse_weight: 0.1
	adv_weight: 0.0001
	consist_weight: 0.01
	consist_threshold: 0.5
	lr: 0.0001
	optim_type: adamw
	weight_decay: 1.0e-05
	env:
	num_gpus: 2
	num_nodes: 1
	batch_size: 128
	per_gpu_batch_size: 8
	inference_batch_size_ratio: 4
	precision: 16-mixed
	num_workers: 2
	num_workers_inference: 2
	accelerator: auto
	fast_dev_run: false
	deterministic: false
	auto_select_gpus: true
	strategy: ddp_fork_find_unused_parameters_true
	deepspeed_allgather_size: 1000000000.0
	deepspeed_allreduce_size: 1000000000.0
	compile:
	turn_on: false
	mode: default
	dynamic: true
	backend: inductor