Spaces:

ASLP-lab
/

OSUM-EChat

Running on Zero

App Files Files Community

OSUM-EChat / conf /ct_config.yaml

xlgeng

开始部署

841f290 22 days ago

raw

history blame contribute delete

4.09 kB

	model: osum_echat

	# llm_path
	llm_path: &llm_path "Qwen/Qwen2.5-3B-Instruct"

	#
	# model config
	downsample_rate: 4 # 1 2 4 8
	adapter_type: osum_echat
	if_instruct: true
	input_dim: 80

	# tokenizer ,gxl
	tokenizer: huggingface
	tokenizer_conf:
	llm_path: *llm_path

	# lora config
	use_lora: false
	lora_alpha: 32
	lora_rank: 64 # 3B -> 85M
	lora_dropout: 0.1

	# speech generate config
	speech_token_num: &token_num 4097 #4097


	# Configuration of parameters for training
	fire_module: link_and_encoder_and_lora # link encoder llm link_and_encoder link_and_encoder_and_lora, llm需要配合use_lora为true

	# other config
	grad_clip: 5
	accum_grad: 8
	log_interval: 10
	save_interval: 1250 #1250 #2500
	max_epoch: 1
	init_step: true

	# training config
	optim: adamw
	optim_conf:
	betas:
	- 0.9
	- 0.99
	eps: 1.0e-06
	lr: 1.0e-06
	weight_decay: 0.01
	scheduler: warmuplr
	scheduler_conf:
	warmup_steps: 2000


	dataset: asr
	dataset_conf:
	speech_token_num: *token_num
	batch_conf:
	batch_size: 26
	batch_type: dynamic
	max_frames_in_batch: 28000000 #3000 #9000 #3000 #3300 # 3900
	max_seq_in_batch: 3700 #1500 #4000 #1100 #1600 # 1900
	feats_type: log_mel_spectrogram
	filter_conf:
	max_length: 20000
	min_length: 20
	token_max_length: 1200
	token_min_length: 1
	filter_no_extra_info: true # 如果没有task lang 等信息,直接过滤掉, 适用于通用多任务训练, 推理时应该关掉
	max_seq_len: 2000 #、1100 #1000
	other_filter_conf:
	only_s2s: false # 只针对与s2s dataloader的过滤
	only_s2t: false # 只针对与s2t dataloader的过滤
	only_t2t: false # 只针对与t2t dataloader的过滤
	only_t2s: false # 只针对与t2s dataloader的过滤
	language_conf:
	limited_langs:
	- zh
	log_mel_spectrogram_conf:
	hop_length: 160
	n_fft: 400
	num_mel_bins: 80
	padding: 0
	resample_conf:
	resample_rate: 16000
	shuffle: true
	shuffle_conf:
	shuffle_size: 1500
	sort: true
	sort_conf:
	sort_size: 500
	spec_aug: true
	spec_aug_conf:
	max_f: 10
	max_t: 50
	num_f_mask: 2
	num_t_mask: 2
	spec_sub: true
	spec_sub_conf:
	max_t: 30
	num_t_sub: 3
	spec_trim: false
	speed_perturb: false
	eod_id: 151645
	split_num: 1
	multi_num: 2
	prompt_conf_path: conf/prompt_config.yaml
	data_recover: false
	data_recover_conf:
	start_idx: 0 # 删除前面start_idx个item(tar包)
	other_tokenze_conf: # 一些对数据额外操作的可控按钮,这些操作一般来说再test时都得为false
	only_info:
	only_s2s: false # 只针对与s2s dataloader的过滤
	only_s2t: false # 只针对与s2t dataloader的过滤
	only_t2t: false # 只针对与t2t dataloader的过滤
	only_t2s: false # 只针对与t2s dataloader的过滤
	use_50_per_change_if_only_X: true # 50%的句子随机替换为其only X
	use_s2s_streaming_random:
	enable: false
	rate: 0.5 # 1.0 表示100%的句子随机替换为其only X
	natural_language_convert:
	enable: false
	rate: 0.00 # 1.0 表示100%的转换成自然语言模式
	use_s2s_convert_s2t:
	enable: false # 单独为s2t dataloader 开启s2s convert
	rate: 1.0 # 1.0 表示100%的句子随机替换为其only X
	use_streaming_tts:
	enable: false
	rate: 0.5 # 1.0 表示100%的句子随机替换为其only X
	use_think_mode:
	enable: false # 开启think 模式, 即随机替换为think模式的句子
	rate: 0.8
	other_filter_conf:
	fiter_txt_is_None: true # 过滤掉text is "<NONE>"的语音数据,适配由于gender数据部分含有<NONE>标签而设计。但仅train起作用

	# model config for encoder
	encoder: transformer
	encoder_conf:
	activation_type: gelu
	attention_dropout_rate: 0.0
	attention_heads: 16
	dropout_rate: 0.1
	gradient_checkpointing: true
	input_layer: conv1d2
	key_bias: false
	linear_units: 4096
	normalize_before: true
	num_blocks: 24
	output_size: 1024
	pos_enc_layer_type: abs_pos_whisper
	positional_dropout_rate: 0.1
	static_chunk_size: -1
	use_dynamic_chunk: false
	use_dynamic_left_chunk: false