SingingSDS / config /interface /options.yaml
jhansss's picture
change bilingual svs to use espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
3166c53
asr_models:
- id: openai/whisper-large-v3-turbo
name: Whisper large-v3-turbo
- id: openai/whisper-large-v3
name: Whisper large-v3
- id: openai/whisper-medium
name: Whisper medium
- id: openai/whisper-small
name: Whisper small
- id: funasr/paraformer-zh
name: Paraformer-zh
llm_models:
- id: gemini-2.5-flash
name: Gemini 2.5 Flash
- id: google/gemma-2-2b
name: Gemma 2 2B
- id: meta-llama/Llama-3.2-3B-Instruct
name: Llama 3.2 3B Instruct
- id: meta-llama/Llama-3.1-8B-Instruct
name: Llama 3.1 8B Instruct
- id: Qwen/Qwen3-8B
name: Qwen3 8B
- id: Qwen/Qwen3-30B-A3B
name: Qwen3 30B A3B
svs_models:
- id: mandarin-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
name: Visinger2 (Bilingual)-zh
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
lang: mandarin
voices:
voice1: resources/singer/singer_embedding_ace-2.npy
voice2: resources/singer/singer_embedding_ace-8.npy
voice3: resources/singer/singer_embedding_itako.npy
voice4: resources/singer/singer_embedding_kising_orange.npy
voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
- id: japanese-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
name: Visinger2 (Bilingual)-jp
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
lang: japanese
voices:
voice1: resources/singer/singer_embedding_ace-2.npy
voice2: resources/singer/singer_embedding_ace-8.npy
voice3: resources/singer/singer_embedding_itako.npy
voice4: resources/singer/singer_embedding_kising_orange.npy
voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
- id: mandarin-espnet/aceopencpop_svs_visinger2_40singer_pretrain
name: Visinger2 (Chinese)
model_path: espnet/aceopencpop_svs_visinger2_40singer_pretrain
lang: mandarin
voices:
voice1: 5
voice2: 8
voice3: 12
voice4: 15
voice5: 29
melody_sources:
- id: gen-random-none
name: Random Generation
desc: "Melody is generated without any structure or reference."
- id: sample-note-kising
name: Sampled Melody (KiSing)
desc: "Melody is retrieved from KiSing dataset."
- id: sample-note-touhou
name: Sampled Melody (Touhou)
desc: "Melody is retrieved from Touhou dataset."
- id: sample-lyric-kising
name: Sampled Melody with Lyrics (Kising)
desc: "Melody with aligned lyrics are sampled from Kising dataset."
- id: sample-lyric-genre
name: Sampled Melody with Lyrics (Synthetic)
desc: "Melody with aligned lyrics are sampled from Kising dataset."