OSUM-EChat / conf /data_s2t.yaml
xlgeng's picture
开始部署
841f290
# age gender,
age_gender_common:
path: /home/A02_tmpdata3/osum_s2s/gender/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 1511
gender_xianshi:
path: /home/A02_tmpdata3/osum_s2s/sex_xianshi_cosyvoice2_by_cywang_added_by_20250625/raw_data/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 30
gender_yinshi_3k:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_sex_yinshi_7_4_osum_by_cywang_added_by_20250708/raw_data/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 3
gender_yinshi_5k:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_sex_yinshi_5000_6_13_data_by_gjli_added_by_20250622/raw_data/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 6
age_xianshi:
path: /home/A02_tmpdata3/osum_s2s/age_xianshi_cosyvoice2_by_cywang_added_by_20250625/raw_data/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 25
# caption
caption_common_7label:
path: /home/A02_tmpdata3/osum_s2s/caption/raw_data/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 162
caption_common_50_label:
path: /home/A02_tmpdata3/osum_s2s/caption_add_2025_1_6/raw_data/s2s_data_with_gender/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 395 # 实际是196k
caption_xianshi:
path: /home/A02_tmpdata3/osum_s2s/caption_s2s_xianshi_20250806/raw_data/s2s_data/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 6
weight: 10
# emotion
emotion_100K_sensevoice:
path: /home/A02_tmpdata3/osum_s2s/emotion_yinshi_zxzhao_with_q_emo_by_cywang_added_by_20250701/handle_data/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 107
weight: 10
emotion_30K_sensevoice:
path: /home/A02_tmpdata3/emotion/中英混多音色情感数据库/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
tar_num: 33
weight: 10
S2SChat_osum_setting_qa_527_updated_by_cywang_added_by_20250616_think:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_osum_setting_qa_527_updated_by_cywang_added_by_20250616/raw_data/s2s_handle/xlgeng_new_data/s2t_thinking/doubao/combines_list.txt
shard_num: 8
weight: 10
# ======================================s2s 副语言 thinking end=====================================
# ===========================副语言 s2s no thinking ===================================
# age gender,
age_gender_common_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/gender/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 1511
gender_xianshi_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/sex_xianshi_cosyvoice2_by_cywang_added_by_20250625/raw_data/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 30
gender_yinshi_3k_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_sex_yinshi_7_4_osum_by_cywang_added_by_20250708/raw_data/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 3
gender_yinshi_5k_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_sex_yinshi_5000_6_13_data_by_gjli_added_by_20250622/raw_data/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 6
age_xianshi_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/age_xianshi_cosyvoice2_by_cywang_added_by_20250625/raw_data/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 25
# caption
caption_common_7label_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/caption/raw_data/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 162
caption_common_50_label_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/caption_add_2025_1_6/raw_data/s2s_data_with_gender/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 395 # 实际是196k
caption_xianshi_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/caption_s2s_xianshi_20250806/raw_data/s2s_data/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 6
# emotion
emotion_100K_sensevoice_no_thinking:
path: /home/A02_tmpdata3/osum_s2s/emotion_yinshi_zxzhao_with_q_emo_by_cywang_added_by_20250701/handle_data/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 107
weight: 10
emotion_30K_sensevoice_no_thinking:
path: /home/A02_tmpdata3/emotion/中英混多音色情感数据库/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
tar_num: 33
weight: 10
S2SChat_osum_setting_qa_527_updated_by_cywang_added_by_20250616:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_osum_setting_qa_527_updated_by_cywang_added_by_20250616/raw_data/s2s_handle/xlgeng_new_data/s2t_no_thinking/doubao/combines_list.txt
shard_num: 8
weight: 10
# -------------------------------------------s2s 副语言 no thinking end-------------------------------------------
# 知识问答
S2SChat_syndata_merged_by_300W_zhguo_added_by_20250616:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_syndata_merged_by_300W_zhguo_added_by_20250616/combines_data_s2t/combines_list.txt
tar_num: 3000
S2SChat_osum_total_data_lst_check_final_100W_by_zhguo_added_by_20250616:
path: /home/A02_tmpdata3/osum_s2s/S2SChat_osum_total_data_lst_check_final_100W_by_zhguo_added_by_20250616/combines_data_s2t/combines_list.txt
tar_num: 1000
# ======================================s2t 副语言 no thinking end==========================
# 语音理解==========================================
asr:
huawei_path: "/mnt/sfs/asr/asr/shards_list.txt" # 2.4
lab_path: "/home/node54_tmpdata/xlgeng/asr_data_2w/shards_list.txt"
path: "/home/A03_tmpdata1/s2s/asr_data_2.4w/asr_data_2w/shards_list.txt"
shard_num: 15477
weight: 0.1 # ~10000h
# ===========理解任务 ==============================================
librispeech:
huawei_path: "/mnt/sfs/asr/update_data/LibriSpeech_shard_common/shards_list.txt" #1000h
lab_path: "/home/work_nfs15/asr_data/data/LibriSpeech/LibriSpeech_shard_common/shards_list.txt"
path: "/home/A03_tmpdata3/asr_data/librispeech/shards_list.txt"
shard_num: 282
weight: 1
mix_asru200_add_2025_2_14:
huawei_path: "/mnt/sfs/asr/update_data/mix_asru200_add_2025_2_14/shards_list.txt" # 200
path: "/home/A03_tmpdata1/s2s/asru700/train/shards_list.txt"
lab_path: "/home/work_nfs15/asr_data/data/ASRU700/train/shards_list.txt" # 中英混单词之间是有空格的
shard_num: 187
weight: 1
caption:
path: "/home/A02_tmpdata3/osum_s2s/caption/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/caption/shards_list.txt" # 319h
lab_path: "/home/node54_tmpdata2/data4understand/update_data/caption/shards_list.txt"# 是cap audio set+aishell2的拼接
shard_num: 319
weight: 0.5
caption_add_2025_1_6:
path: "/home/A02_tmpdata3/osum_s2s/caption_add_2025_1_6/shards_list.txt"
lab_path: "/home/work_nfs7/yacao/0106_twj_shard/shards_0306/add_label/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/caption_2025_1_6_newadd/shards_list.txt" # 130h
shard_num: 392
weight: 0.5
caption_aslp_add_2025_1_15:
path: "/home/A02_tmpdata3/osum_s2s/caption_aslp_add_2025_1_15/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/caption_aslp_add_2025_1_15/shards_list.txt" # 5h
shard_num: 5
lab_path: "/home/work_nfs9/yacao/nfs7_copy/yacao/shard/0114_wjtian_simu2/aslp_caption_train/shards_list.txt"
weight: 5
# 50类别的caption
s2t_caption_50label:
shard_num: 392
path: "/home/A02_tmpdata3/osum_s2s/s2t_caption_50label/shards_list.txt"
lab_path: "/home/work_nfs7/yacao/0106_twj_shard/shards_0306/add_label/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/0106_twj_shard_caption_50label_add_by_2025_3_10/shards_list.txt" # 392tar
weight: 0.5 # 10
emotion: # 不全, 312tar
path: "/home/A02_tmpdata3/osum_s2s/emotion/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/emotion/shards_list.txt"
huawei_path: "/mnt/sfs/asr/emotion/shards_list.txt"
shard_num: 370
weight: 0.5 # 538h
emotion_stage2_add:
path: "/home/A02_tmpdata3/osum_s2s/emotion_stage2_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/emotion_stage2_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/emotion_stage2_add/shards_list.txt"
shard_num: 44
weight: 0.1 # 150h
emotion_stage3_add:
path: "/home/A02_tmpdata3/osum_s2s/emotion_stage3_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/emotion_stage3_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/emotion_stage3_add/shards_list.txt"
shard_num: 53
weight: 0.1 # 138h
emotion_stage4_add:
path: "/home/A02_tmpdata3/osum_s2s/emotion_stage4_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/emotion_stage4_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/emotion_stage4_add/shards_list.txt"
shard_num: 54
weight: 0.1 #100h
emotion_stage5_add:
path: "/home/A02_tmpdata3/osum_s2s/emotion_stage5_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/emotion_stage5_add/shards_list.txt"
shard_num: 53
huawei_path: "/mnt/sfs/asr/emotion_stage5_add/shards_list.txt"
weight: 0.1
emotion_meld:
path: "/home/A02_tmpdata3/osum_s2s/emotion_meld/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/emotion_meld/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/emotion_meld/shards_list.txt" # 8h
shard_num: 9
weight: 1
#emotion_dis_fear_add_2025_1_15:
# huawei_path: "/mnt/sfs/asr/update_data/emotion_dis_fear_add_2025_1_15/shards_list.txt"
# weight: 0
emotion_lucy_Q_added_2025_4_9:
path: "/home/A02_tmpdata3/osum_s2s/s2s_lucy_Q_emotion/shards_list.txt"
shard_num: 121
lab_path: "/home/work_nfs11/cywang/data/shard/emotion/QEmo_Q_train/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/emotion_lucy_Q_added_2025_4_9/shards_list.txt"
weight: 0.5
Age_with_noize_add_2025_2_4: # 不全,才245个
path: "/home/A02_tmpdata3/osum_s2s/age_3000_noize/shards_list.txt"
lab_path: "/home/work_nfs6/syliu/for_gxl/Age/simu_age/shards_list.txt"
shard_num: 2720
huawei_path: "/mnt/sfs/asr/update_data/Age_with_noize_add_2025_2_4/shards_list.txt"
weight: 0.1
age:
path: "/home/A02_tmpdata3/osum_s2s/age_3000/shards_list.txt"
lab_path: "/home/work_nfs3/syliu/for_gxl/Age/age/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/age/shards_list.txt"
shard_num: 2820
weight: 0.1 #1.5 # 3000h
gender: # 不全,目前310个
shard_num: 1738
lab_path: "/home/xlgeng/sdb2/gender/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/sex/shards_list.txt" # 3000
path: "/home/A02_tmpdata3/osum_s2s/gender/shards_list.txt"
weight: 0.1 #1.5
gender_add_2025_1_6_kaggle: # 全了
shard_num: 116
path: "/home/A02_tmpdata3/osum_s2s/gender_kaggle/shards_list.txt"
lab_path: "/home/work_nfs3/syliu/for_gxl/new_gender/Sex/sex/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/sex_2025_1_6_newadd/shards_list.txt" # 107h, kaggle
weight: 0.1 #3
gender_add_2025_2_4_fix: # 2100tar # 不全,365个
path: "/home/A02_tmpdata3/osum_s2s/gender_add_2025_2_4_fix/shards_list.txt"
shard_num: 2140
lab_path: "/home/work_nfs6/xlgeng/for_gxl/gender_add_2025_2_4_fix/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/gender_add_2025_2_4_fix/shards_list.txt"
weight: 0.1
gender_with_noize_add_2025_2_4: # 1500h ,780tar # 不全,266个
path: "/home/A02_tmpdata3/osum_s2s/gender_with_noize_add_2025_2_4/shards_list.txt"
lab_path: "/home/work_nfs6/xlgeng/for_gxl/gender_with_noize_add_2025_2_4/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/gender_with_noize_add_2025_2_4/shards_list.txt"
shard_num: 780
weight: 0.1
age_gender_stage2_add:
path: "/home/A02_tmpdata3/osum_s2s/age_gender_stage2_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/age_gender_stage2_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/Speech_Age_Sex/shards_list.txt"
weight: 0.1 # 174h
age_gender_add_2025_1_13:
path: "/home/A02_tmpdata3/osum_s2s/age_gender_add_2025_1_13/shards_list.txt"
lab_path: "/home/work_nfs3/syliu/for_gxl/Age_Sex/age_sex/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/Speech_Age_Sex_add_2025_1_13/shards_list.txt"
weight: 0.1 #2571h
style_age_gender_stage3_add:
path: "/home/A02_tmpdata3/osum_s2s/style_age_gender_stage3_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/style_age_gender_stage3_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/Speech_Style_Age_Sex/shards_list.txt"
weight: 0.1 # 85h
age_gender_pure_stage3_add:
path: "/home/A02_tmpdata3/osum_s2s/age_gender_pure_stage3_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/age_gender_pure_stage3_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/Age_Sex/shards_list.txt"
weight: 0.1 # 174h
style_age_gender_pure_stage3_add:
path: "/home/A02_tmpdata3/osum_s2s/style_age_gender_pure_stage3_add/shards_list.txt"
lab_path: "/home/xlgeng/sdb2/style_age_gender_pure_stage3_add/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/Style_Age_Sex/shards_list.txt"
weight: 0.1 # 85h
# 多任务, caption
merged_output_caption_age_gender_add_2025_2_26:
path: "/home/A02_tmpdata3/osum_s2s/merged_output_caption_age_gender_add_2025_2_26/shards_list.txt"
lab_path: "/home/work_nfs7/yacao/0106_twj_shard/shards_0226/merged_output/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/multi_task/caption_new/merged_output/shards_list.txt"
weight: 0.1
nfs10_time1_output_caption_age_gender_add_2025_2_26:
path: "/home/A02_tmpdata3/osum_s2s/nfs10_time1_output_caption_age_gender_add_2025_2_26/shards_list.txt"
lab_path: "/home/work_nfs7/yacao/0106_twj_shard/shards_0226/nfs10_time1/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/multi_task/caption_new/nfs10_time1/shards_list.txt"
weight: 0.1
other_20000_caption_age_gender_add_2025_2_26:
path: "/home/A02_tmpdata3/osum_s2s/other_20000_caption_age_gender_add_2025_2_26/shards_list.txt"
lab_path: "/home/work_nfs7/yacao/0106_twj_shard/shards_0226/other_20000/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/multi_task/caption_new/other_20000/shards_list.txt"
weight: 0.1
simu9_1227_caption_age_gender_add_2025_2_26:
path: "/home/A02_tmpdata3/osum_s2s/simu9_1227_caption_age_gender_add_2025_2_26/shards_list.txt"
lab_path: "/home/work_nfs7/yacao/0106_twj_shard/shards_0226/simu9_1227/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/multi_task/caption_new/simu9_1227/shards_list.txt"
weight: 0.1
# 多任务, emotion
merged_output_emotion_age_gender_add_2025_3_2:
path: "/home/A02_tmpdata3/osum_s2s/merged_output_emotion_age_gender_add_2025_3_2/shards_list.txt"
lab_path: "/home/work_nfs16/emotion_data/OSUM_age_gender/emotion_age_gender1/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/multi_task/emotion_age_gender1/shards_list.txt"
weight: 0.1
merged_output_emotion_age_gender_add_2025_3_2_di2pi:
path: "/home/A02_tmpdata3/osum_s2s/merged_output_emotion_age_gender_add_2025_3_2_di2pi/shards_list.txt"
shard_num: 181
lab_path: "/home/work_nfs16/emotion_data/OSUM_age_gender/emotion_age_gender2/shards_list.txt"
huawei_path: ""
weight: 0.1
# 多任务, style
merged_output_style_age_gender_add_2025_3_2:
path: "/home/A02_tmpdata3/osum_s2s/merged_output_style_age_gender_add_2025_3_2/shards_list.txt"
lab_path: "/home/node54_tmpdata2/gjli/style_age_gender_data/style_labeling_100wto200w_part1_age_gender/shards_list.txt"
shard_num: 107
huawei_path: "/mnt/sfs/asr/update_data/multi_task/style_labeling_100wto200w_part1_age_gender/shards_list.txt"
weight: 0.1
merged_output_style_origin_tts_age_gender_add_2025_3_2:
path: "/home/A02_tmpdata3/osum_s2s/merged_output_style_origin_tts_age_gender_add_2025_3_2/shards_list.txt"
lab_path: "/home/node54_tmpdata2/gjli/style_age_gender_data/style_origin_tts_age_gender/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/multi_task/style_origin_tts_age_gender/shards_list.txt"
weight: 0.1
style_labeling_100wto200w_part1_age_gender_emotion_gjli:
path: "/home/A02_tmpdata3/osum_s2s/style_labeling_100wto200w_part1_age_gender_emotion_gjli/shards_list.txt"
lab_path: "/home/node54_tmpdata2/gjli/style_labeling_100wto200w_part1_age_gender_emotion/shards_list.txt" # 107
huawei_path: "/mnt/sfs/asr/update_data/style_labeling_100wto200w_part1_age_gender_emotion/shards_list.txt" #107tar
weight: 0.5
style_labeling_200wto300w_part1_age_gender_emotion_gjli:
path: "/home/A02_tmpdata3/osum_s2s/style_labeling_200wto300w_part1_age_gender_emotion_gjli/shards_list.txt"
lab_path: "/home/node54_tmpdata2/gjli/style_labeling_200wto300w_part2/shards_list.txt"
shard_num: 236
huawei_path: "_"
age_gender_style_emotion1_add_2025_3_29_zxzhao:
path: "/home/A02_tmpdata3/osum_s2s/age_gender_style_emotion1_add_2025_3_29_zxzhao/shards_list.txt"
lab_path: "/home/work_nfs16/emotion_data/OSUM_age_gender/age_gender_style_emotion1/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/age_gender_style_emotion1_add_2025_3_29_zxzhao/shards_list.txt" # 256tar
weight: 0.5
5_label_caption_age_gender_style_emotion_added_2025_3_29_yacao:
path: "/home/A02_tmpdata3/osum_s2s/5_label_caption_age_gender_style_emotion_added_2025_3_29_yacao/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/5_label_caption_age_gender_style_emotion_added_2025_3_29_yacao/shards_list.txt" #270tar
lab_path: "/home/work_nfs7/yacao/0320_multilabel_2/shard/5_label/shards_list.txt"
weight: 0.5
# audio description 数据
audio_caption_by_wjtian_added_by_20250414: # 其实是 20250411 ,写日期的时候由于自动补全写错了
path: "/home/A02_tmpdata3/osum_s2s/audio_caption_by_wjtian_added_by_20250414/shards_list.txt"
lab_path: "/home/work_nfs7/cywang/OSUM/OSUM_data/shard/audio_caption/audio_caption/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/audio_caption_by_wjtian_added_by_20250414/shards_list.txt" # 2155 tar
weight: 0.15 # 开始上传天文杰准备的audio_caption数据,音频描述数据
S2SChat_MMAU_training_all_by_wjtian_added_by_20250708:
path: "/home/A02_tmpdata3/osum_s2s/S2SChat_MMAU_training_all_by_wjtian_added_by_20250708/shards_list.txt"
lab_path: "/home/work_nfs11/cywang/data/shard/S2Chat/MMAU-training-all/shards_list.txt"
huawei_path: "/mnt/sfs/asr/update_data/S2SChat_MMAU_training_all_by_wjtian_added_by_20250708/shards_list.txt" # 1000 tar
shard_num: 22
weight: 5